The Computer Language
Benchmarks Game

k-nucleotide C# .NET Core #3 program

source code

/* The Computer Language Benchmarks Game
   http://benchmarksgame.alioth.debian.org/
 *
 * contributed by Isaac Gouy
 * modified by Antti Lankila for generics
 * modified by A.Nahr for performance and throwing unneccesary stuff out
 * remove unsafe code & add multithread by The Anh Tran
 */

using System;
using System.IO;
using System.Collections.Generic;
using System.Text;
using System.Threading;

public class knucleotide
{
    static private string   input;
    static private int      task_count = 7;
    static private string[] result = new string[7];

   public static void Main(/*string[] arg*/)
   {
      StreamReader source = new StreamReader(Console.OpenStandardInput());
        
        string line;
      while ((line = source.ReadLine()) != null)
      {
         if (line.StartsWith(">THREE", StringComparison.CurrentCultureIgnoreCase))
            break;
      }

      StringBuilder buf = new StringBuilder(64 * 1024 * 1024); // 64mb
      while ((line = source.ReadLine()) != null)
         buf.Append(line);

        input = buf.ToString();
      buf = null;

        Thread[] threads = new Thread[Environment.ProcessorCount];
        for (int i = 0; i < threads.Length; i++)
        {
            threads[i] = new Thread(worker);
            threads[i].Start();
        }

        foreach (Thread t in threads)
            t.Join();
        foreach (string s in result)
            Console.Out.WriteLine(s);
   }

    private static void worker()
    {
        int j;
        while ((j = Interlocked.Decrement(ref task_count)) >= 0)
        {
            switch (j)
            {
                case 0:
                    result[j] = WriteFrequencies(1);
                    break;
                case 1:
                    result[j] = WriteFrequencies(2);
                    break;
                case 2:
                    result[j] = WriteCount("ggt");
                    break;
                case 3:
                    result[j] = WriteCount("ggta");
                    break;
                case 4:
                    result[j] = WriteCount("ggtatt");
                    break;
                case 5:
                    result[j] = WriteCount("ggtattttaatt");
                    break;
                case 6:
                    result[j] = WriteCount("ggtattttaatttatagt");
                    break;
            }
        }
    }

    private static string WriteFrequencies(int nucleotideLength)
   {
        Dictionary<Key, Value> frequencies = GenerateFrequencies(nucleotideLength);

        List<KeyValuePair<Key, Value>> items = new List<KeyValuePair<Key, Value>>(frequencies);
      items.Sort(SortByFrequencyAndCode);

        StringBuilder buf = new StringBuilder();
      int sum = input.Length - nucleotideLength + 1;

        foreach (KeyValuePair<Key, Value> element in items)
      {
         float percent = element.Value.value * 100.0f / sum;
            buf.AppendFormat("{0} {1:f3}\n", element.Key, percent);
      }

        return buf.ToString();
   }

   private static string WriteCount(string nucleotideFragment)
   {
        Dictionary<Key, Value> frequencies = GenerateFrequencies(nucleotideFragment.Length);
        Key specific = new Key(nucleotideFragment);
      
        int count = 0;
      if (frequencies.ContainsKey(specific))
         count = frequencies[specific].value;
      
        return string.Format("{0}\t{1}", count, nucleotideFragment.ToUpper());
   }

    private static Dictionary<Key, Value> GenerateFrequencies(int frame_size)
   {
        Dictionary<Key, Value> frequencies = new Dictionary<Key, Value>();
        
        Key k = new Key(frame_size);
        Value val;

        int end = input.Length - frame_size + 1;
        for (int index = 0; index < end; index++)
        {
            k.ReHash(input, index);

            frequencies.TryGetValue(k, out val);
            if (val != null)   // must use a class type in order to compare reference with null
                val.value++;   // if we use 'int', this step require 1 more lookup
            else
                frequencies.Add(new Key(k), new Value());
        }
        return frequencies;
   }

   private static int SortByFrequencyAndCode(KeyValuePair<Key, Value> item1, KeyValuePair<Key, Value> item2)
   {
      int comparison = item2.Value.value - item1.Value.value;
      if (comparison == 0) 
            return item1.Key.key.ToString().CompareTo(item2.Key.key.ToString());
      else 
            return comparison;
   }

    internal class Key
    {
        internal int    hash;
        internal char[] key;

        public Key(int frame)
        {
            key = new char[frame];
        }

        public Key(Key k)
        {
            hash = k.hash;
            this.key = (char[])k.key.Clone();
        }
        
        public Key(string s)
        {
            key = new char[s.Length];
            ReHash(s, 0);
        }
        public void ReHash(string k, int offset)
        {
            hash = 0;
            for (int i = 0; i < key.Length; i++)
            {
                key[i] = k[offset + i];
                hash = hash * 31 + key[i];
            }
        }

        public override int GetHashCode()
        {
            return hash;
        }

        public override string ToString()
        {
            return new string(key).ToUpper();
        }

        public override bool Equals(object k)
        {
            return this.hash == ((Key)k).hash;
        }
        /*
        public bool Equals (Key k)
        {
         return this.hash == k.hash;
        }
        */
    }

    internal class Value
    {
        internal int value;

        public Value()
        {
            value = 1;
        }
    }
}

    

notes, command-line, and program output

NOTES:
64-bit Ubuntu quad core
2.0.2 a04b4bf512
"System.GC.Server": true


Fri, 27 Oct 2017 00:24:53 GMT

MAKE:
cp knucleotide.csharpcore-3.csharpcore Program.cs
cp Include/csharpcore/tmp.csproj .
cp Include/csharpcore/runtimeconfig.template.json .
mkdir obj
cp Include/csharpcore/tmp.csproj.nuget.g.props ./obj
cp Include/csharpcore/tmp.csproj.nuget.g.targets ./obj
/usr/bin/dotnet build -c Release
Microsoft (R) Build Engine version 15.4.8.50001 for .NET Core
Copyright (C) Microsoft Corporation. All rights reserved.

  tmp -> /home/dunham/benchmarksgame_quadcore/knucleotide/tmp/bin/Release/netcoreapp2.0/tmp.dll

Build succeeded.
    0 Warning(s)
    0 Error(s)

Time Elapsed 00:00:03.74

6.31s to complete and log all make actions

COMMAND LINE:
/usr/bin/dotnet ./bin/Release/netcoreapp2.0/tmp.dll 0 < knucleotide-input25000000.txt

PROGRAM OUTPUT:
A 30.295
T 30.151
C 19.800
G 19.754

AA 9.177
TA 9.132
AT 9.131
TT 9.091
CA 6.002
AC 6.001
AG 5.987
GA 5.984
CT 5.971
TC 5.971
GT 5.957
TG 5.956
CC 3.917
GC 3.911
CG 3.909
GG 3.902

1471758	GGT
446535	GGTA
47336	GGTATT
893	GGTATTTTAATT
893	GGTATTTTAATTTATAGT