The Computer Language
Benchmarks Game

k-nucleotide Rust #7 program

source code

// The Computer Language Benchmarks Game
// http://benchmarksgame.alioth.debian.org/
//
// contributed by the Rust Project Developers
// contributed by Cristi Cobzarenco (@cristicbz)
// contributed by TeXitoi
// Rust #4 modified to use same get_seq as Rust #3
// modified by Tung Duong

#![allow(non_snake_case)]

extern crate rayon;
extern crate ordermap;

use std::sync::Arc;
use std::hash::{Hasher, BuildHasherDefault};
use rayon::prelude::*;
use Item::*;
use ordermap::OrderMap;

struct NaiveHasher(u64);
impl Default for NaiveHasher {
    fn default() -> Self {
        NaiveHasher(0)
    }
}
impl Hasher for NaiveHasher {
    fn finish(&self) -> u64 {
        self.0
    }
    fn write(&mut self, _: &[u8]) {
        unimplemented!()
    }
    fn write_u64(&mut self, i: u64) {
        self.0 = i ^ i >> 7;
    }
}
type NaiveBuildHasher = BuildHasherDefault<NaiveHasher>;
type NaiveHashMap<K, V> = OrderMap<K, V, NaiveBuildHasher>;
type Map = NaiveHashMap<Code, u32>;

#[derive(Hash, PartialEq, PartialOrd, Ord, Eq, Clone, Copy)]
struct Code(u64);
impl Code {
    fn push(&mut self, c: u8, mask: u64) {
        self.0 <<= 2;
        self.0 |= c as u64;
        self.0 &= mask;
    }
    fn from_str(s: &str) -> Code {
        let mask = Code::make_mask(s.len());
        let mut res = Code(0);
        for c in s.as_bytes() {
            res.push(Code::encode(*c), mask);
        }
        res
    }
    fn to_string(&self, frame: usize) -> String {
        let mut res = vec![];
        let mut code = self.0;
        for _ in 0..frame {
            let c = match code as u8 & 0b11 {
                c if c == Code::encode(b'A') => b'A',
                c if c == Code::encode(b'T') => b'T',
                c if c == Code::encode(b'G') => b'G',
                c if c == Code::encode(b'C') => b'C',
                _ => unreachable!(),
            };
            res.push(c);
            code >>= 2;
        }
        res.reverse();
        String::from_utf8(res).unwrap()
    }
    fn make_mask(frame: usize) -> u64 {
        (1u64 << (2 * frame)) - 1
    }
    fn encode(c: u8) -> u8 {
        (c & 0b110) >> 1
    }
}

struct Iter<'a> {
    iter: std::slice::Iter<'a, u8>,
    code: Code,
    mask: u64,
}
impl<'a> Iter<'a> {
    fn new(input: &[u8], frame: usize) -> Iter {
        let mut iter = input.iter();
        let mut code = Code(0);
        let mask = Code::make_mask(frame);
        for c in iter.by_ref().take(frame - 1) {
            code.push(*c, mask);
        }
        Iter {
            iter: iter,
            code: code,
            mask: mask,
        }
    }
}
impl<'a> Iterator for Iter<'a> {
    type Item = Code;
    fn next(&mut self) -> Option<Self::Item> {
        self.iter.next().map(|&c| {
            self.code.push(c, self.mask);
            self.code
        })
    }
}

fn gen_freq(input: &[u8], frame: usize) -> Map {
    let mut freq = Map::default();
    for code in Iter::new(input, frame) {
        *freq.entry(code).or_insert(0) += 1;
    }
    freq
}

#[derive(Clone, Copy)]
enum Item {
    Freq(usize),
    Occ(&'static str),
}
impl Item {
    fn print(&self, freq: &Map) {
        match *self {
            Freq(frame) => {
                let mut v: Vec<_> = freq.iter().map(|(&code, &count)| (count, code)).collect();
                v.sort();
                let total = v.iter().map(|&(count, _)| count).sum::<u32>() as f32;
                for &(count, key) in v.iter().rev() {
                    println!("{} {:.3}", key.to_string(frame), (count as f32 * 100.) / total);
                }
                println!("");
            }
            Occ(occ) => {
                let count = if freq.contains_key(&Code::from_str(occ)){
                    freq[&Code::from_str(occ)]
                }else { 0 };
                println!("{}\t{}", count , occ);                                
            },
        }
    }
    fn gen_freq(&self, input: &[u8]) -> Map {
        match *self {
            Freq(frame) => gen_freq(input, frame),
            Occ(occ) => gen_freq(input, occ.len()),
        }
    }
}

fn get_seq<R: std::io::BufRead>(r: R, key: &str) -> Vec<u8> {
    let mut res = Vec::new();
    for l in r.lines().map(|l| l.unwrap()).skip_while(|l| !l.starts_with(key)).skip(1) {
        res.extend(l.trim().as_bytes().iter().cloned().map(Code::encode));
    }
    res
}

fn main() {
    let ITEMS = vec![
        Freq(1),
        Freq(2),
        Occ("GGT"),
        Occ("GGTA"),
        Occ("GGTATT"),
        Occ("GGTATTTTAATT"),
        Occ("GGTATTTTAATTTATAGT"),
    ];
    let stdin = std::io::stdin();
    let input = get_seq(stdin.lock(), ">THREE");
    let input = Arc::new(input);
    
    // In reverse to spawn big tasks first
    let items: Vec<_> = ITEMS.into_par_iter().rev().map(|item| {
        let input = input.clone();        
        (item, item.gen_freq(&input))
    }).collect();

    for (item, freq) in items.into_iter().rev() {
        item.print(&freq);
    }
}
    

notes, command-line, and program output

NOTES:
64-bit Ubuntu quad core
rustc 1.25.0 (84203cac6 2018-03-25)


Thu, 29 Mar 2018 16:56:04 GMT

MAKE:
/opt/src/rust-1.25.0/bin/rustc -C opt-level=3 -C target-cpu=core2 -C lto -C codegen-units=1 -L /opt/src/rust-libs knucleotide.rs -o knucleotide.rust-7.rust_run
error[E0460]: found possibly newer version of crate `lazy_static` which `rayon` depends on
  --> knucleotide.rs:12:1
   |
12 | extern crate rayon;
   | ^^^^^^^^^^^^^^^^^^^
   |
   = note: perhaps that crate needs to be recompiled?
   = note: the following crate versions were found:
           crate `lazy_static`: /opt/src/rust-1.25.0/lib/rustlib/x86_64-unknown-linux-gnu/lib/liblazy_static-c2718c97cbc91f7c.rlib
           crate `lazy_static`: /opt/src/rust-libs/liblazy_static-cc3614442e8d4ac5.rlib
           crate `rayon`: /opt/src/rust-libs/librayon-3db3bc39e5457432.rlib

error: aborting due to previous error

/home/dunham/benchmarksgame/nanobench/makefiles/u64q.programs.Makefile:632: recipe for target 'knucleotide.rust-7.rust_run' failed
make: [knucleotide.rust-7.rust_run] Error 101 (ignored)

0.20s to complete and log all make actions

COMMAND LINE:
./knucleotide.rust-7.rust_run 0 < knucleotide-input250000.txt

MAKE ERROR