The Computer Language
Benchmarks Game

k-nucleotide C# .NET Core #2 program

source code

/* The Computer Language Benchmarks Game
   http://benchmarksgame.alioth.debian.org/
 *
 * contributed by Isaac Gouy
 * modified by Antti Lankila for generics
 */

using System;
using System.IO;
using System.Collections.Generic;
using System.Text;

public class program {
    public static void Main(string[] args) {
	string line;
	StreamReader source = new StreamReader(Console.OpenStandardInput());
	StringBuilder input = new StringBuilder();

	while ( (line = source.ReadLine() ) != null ) {
	    if (line[0] == '>' && line.Substring(1, 5) == "THREE")
		break;
	}
	 
	while ( (line = source.ReadLine()) != null ) {
            char c = line[0];
            if (c == '>')
               break;
            if (c != ';')
               input.Append(line.ToUpper());
	}

	KNucleotide kn = new KNucleotide(input.ToString());
        input = null;
	kn.WriteFrequencies(1);
	kn.WriteFrequencies(2);

	kn.WriteCount("GGT");
	kn.WriteCount("GGTA");
	kn.WriteCount("GGTATT");
	kn.WriteCount("GGTATTTTAATT");
	kn.WriteCount("GGTATTTTAATTTATAGT");
    }
}

public class KNucleotide {
    /* freq[foo] ++ implies a get and a set. */
    internal class Value {
	internal int v;

	internal Value(int v)
	{
	    this.v = v;
	}
    }

    private Dictionary<string, Value> frequencies = new Dictionary<string, Value>();
    private string sequence;

    public KNucleotide(string s)
    {
	sequence = s;
    }

    public void WriteFrequencies(int nucleotideLength) {
	GenerateFrequencies(nucleotideLength);

	List<KeyValuePair<string, Value>> items = new List<KeyValuePair<string, Value>>(frequencies);
	items.Sort(SortByFrequencyAndCode);

	int sum = sequence.Length - nucleotideLength + 1;
	foreach (KeyValuePair<string, Value> each in items) {
	    double percent = each.Value.v * 100.0 / sum;
	    Console.WriteLine("{0} {1:f3}", each.Key, percent);
	}
	Console.WriteLine("");
    }

    public void WriteCount(string nucleotideFragment) {
	GenerateFrequencies(nucleotideFragment.Length);

	int count = 0;
	if (frequencies.ContainsKey(nucleotideFragment))
	    count = frequencies[nucleotideFragment].v;
	Console.WriteLine("{0}\t{1}", count, nucleotideFragment);
    }

    private void GenerateFrequencies(int length) {
	frequencies.Clear();
	for (int frame = 0; frame < length; frame++)
	    KFrequency(frame, length);
    }

    private void KFrequency(int readingFrame, int k) {
	int n = sequence.Length - k + 1;
	/* string.Substring is a malloc monster :( */
	if (k > 6) {
	    for (int i = readingFrame; i < n; i += k) {
		string knucleo = sequence.Substring(i, k);
		if (frequencies.ContainsKey(knucleo))
		    frequencies[knucleo].v ++;
		else
		    frequencies[knucleo] = new Value(1);
	    }
	} else {
	    for (int i = readingFrame; i < n; i += k) {
		string knucleo = sequence.Substring(i, k);
		try {
		    frequencies[knucleo].v ++;
		}
		catch (KeyNotFoundException) {
		    frequencies[knucleo] = new Value(1);
		}
	    }
	}
    }

    int SortByFrequencyAndCode(KeyValuePair<string, Value> item1, KeyValuePair<string, Value> item2) {
	int comparison = item2.Value.v.CompareTo(item1.Value.v);
	if (comparison == 0) return item1.Key.CompareTo(item2.Key);
	else return comparison;
    }
}
    

notes, command-line, and program output

NOTES:
64-bit Ubuntu quad core
2.0.2 a04b4bf512
"System.GC.Server": true


Fri, 27 Oct 2017 00:29:08 GMT

MAKE:
cp knucleotide.csharpcore-2.csharpcore Program.cs
cp Include/csharpcore/tmp.csproj .
cp Include/csharpcore/runtimeconfig.template.json .
mkdir obj
cp Include/csharpcore/tmp.csproj.nuget.g.props ./obj
cp Include/csharpcore/tmp.csproj.nuget.g.targets ./obj
/usr/bin/dotnet build -c Release
Microsoft (R) Build Engine version 15.4.8.50001 for .NET Core
Copyright (C) Microsoft Corporation. All rights reserved.

  tmp -> /home/dunham/benchmarksgame_quadcore/knucleotide/tmp/bin/Release/netcoreapp2.0/tmp.dll

Build succeeded.
    0 Warning(s)
    0 Error(s)

Time Elapsed 00:00:03.55

6.11s to complete and log all make actions

COMMAND LINE:
/usr/bin/dotnet ./bin/Release/netcoreapp2.0/tmp.dll 0 < knucleotide-input25000000.txt

PROGRAM OUTPUT:
A 30.295
T 30.151
C 19.800
G 19.754

AA 9.177
TA 9.132
AT 9.131
TT 9.091
CA 6.002
AC 6.001
AG 5.987
GA 5.984
CT 5.971
TC 5.971
GT 5.957
TG 5.956
CC 3.917
GC 3.911
CG 3.909
GG 3.902

1471758	GGT
446535	GGTA
47336	GGTATT
893	GGTATTTTAATT
893	GGTATTTTAATTTATAGT