The Computer Language
Benchmarks Game

k-nucleotide Matz's Interpreter #7 program

source code

# The Computer Language Benchmarks Game
# http://benchmarksgame.alioth.debian.org
#
# contributed by Aaron Tavistock

def find_frequencies(keys)
  @frequencies = {}
  threads = []

  key_lengths = keys.map(&:size).uniq
  key_lengths.each do |key_length|
    threads << Thread.new do
      results_hash = key_frequency(key_length, @seq)
      @frequencies.merge!(results_hash)
    end
  end
  threads.each(&:join)
  @frequencies
end

def forking_key_frequency(key_length, seq)
  reader, writer = IO.pipe

  pid = Process.fork do
    begin
      reader.close
      results = original_key_frequency(key_length, seq)
      Marshal.dump(results, writer)
    ensure
      writer.close
    end
  end

  writer.close
  begin
    results = Marshal.load(reader)
  ensure
    reader.close
  end
  Process.waitpid(pid)

  results
end

def key_frequency(key_length, seq)
  count = Hash.new(0)
  start_index = 0
  last_length = seq.size - key_length
  while start_index < last_length
    key = seq.byteslice(start_index, key_length)
    count[key] += 1
    start_index += 1
  end
  count
end

def frequency(keys)
  keys.map do |key|
    [key, @frequencies[key]]
  end
end

def percentage(keys)
  frequency(keys).sort { |a,b| b[1] <=> a[1] }.map do |key, value|
    "%s %.3f" % [ key.upcase, ( (value*100).to_f / @seq.size) ]
  end
end

def count(keys)
  frequency(keys).sort_by { |a| a[0].size }.map do |key, value|
    "#{value.to_s}\t#{key.upcase}"
  end
end

def load_sequence(marker)
  input = STDIN.read
  start_idx = input.index(marker) + marker.size
  @seq = input[start_idx, input.size - 1]
  @seq.delete!("\n ")
  @seq.freeze
  @seq
end

if RUBY_PLATFORM != 'java'
  class << self
    alias_method :original_key_frequency, :key_frequency
    alias_method :key_frequency, :forking_key_frequency
  end
end

singles = %w(a t c g)
doubles = %w(aa at ac ag ta tt tc tg ca ct cc cg ga gt gc gg)
chains  = %w(ggt ggta ggtatt ggtattttaatt ggtattttaatttatagt)

load_sequence('>THREE Homo sapiens frequency')
find_frequencies(singles + doubles + chains)

print "#{percentage(singles).join("\n")}\n\n"
print "#{percentage(doubles).join("\n")}\n\n"
print "#{count(chains).join("\n")}\n"
    

notes, command-line, and program output

NOTES:
64-bit Ubuntu quad core
ruby 1.8.7 (2008-08-11 patchlevel 72) [x86_64-linux]


Mon, 27 Nov 2017 12:23:08 GMT

COMMAND LINE:
/usr/bin/ruby knucleotide.mri-7.mri 0 < knucleotide-input250000.txt

PROGRAM FAILED 


PROGRAM OUTPUT:

knucleotide.mri-7.mri:50:in `original_key_frequency'knucleotide.mri-7.mri:50:in `original_key_frequency'knucleotide.mri-7.mri:50:in `original_key_frequency'knucleotide.mri-7.mri:50:in `original_key_frequency'knucleotide.mri-7.mri:50:in `original_key_frequency'knucleotide.mri-7.mri:50:in `original_key_frequency'knucleotide.mri-7.mri:50:in `original_key_frequency': undefined method `byteslice' for #<String:0x7f66f3266d10> (NoMethodError)
	from knucleotide.mri-7.mri:27:in `key_frequency'
	from knucleotide.mri-7.mri:24:in `fork'
	from knucleotide.mri-7.mri:24:in `key_frequency'
	from knucleotide.mri-7.mri:13:in `find_frequencies'
	from knucleotide.mri-7.mri:12:in `initialize'
	from knucleotide.mri-7.mri:12:in `new'
	from knucleotide.mri-7.mri:12:in `find_frequencies'
	from knucleotide.mri-7.mri:11:in `each'
	from knucleotide.mri-7.mri:11:in `find_frequencies'
	from knucleotide.mri-7.mri:96
knucleotide.mri-7.mri:36:in `load': end of file reached (EOFError)
	from knucleotide.mri-7.mri:36:in `join'
	from knucleotide.mri-7.mri:36:in `to_proc'
	from knucleotide.mri-7.mri:17:in `each'
	from knucleotide.mri-7.mri:17:in `find_frequencies'
	from knucleotide.mri-7.mri:96