The Computer Language
Benchmarks Game

regex-redux Dart #2 program

source code

/* The Computer Language Benchmarks Game
   http://benchmarksgame.alioth.debian.org/

   regex-dna program contributed by Jos Hirth, based on the JavaScript version
     which was created by Jesse Millikan, jose fco. gonzalez, and Matthew Wilson

   converted from regex-dna program
*/

import 'dart:io';
import 'dart:convert';

void main() {
  var text = new StringBuffer();
  var src = stdin.transform(UTF8.decoder).transform(new LineSplitter());

  src.listen((line) {
    if (line != null) {
      text.write(line);
      text.write('\n');
    }
  },
  onDone: () {
    regexAllTheThings(text.toString());
  });
}

void regexAllTheThings (String fullText) {
  var lengthA, lengthB, lengthC, regexp, replacements;

  regexp = ((){
    var pattern = [
      'agggtaaa|tttaccct',
      '[cgt]gggtaaa|tttaccc[acg]',
      'a[act]ggtaaa|tttacc[agt]t',
      'ag[act]gtaaa|tttac[agt]ct',
      'agg[act]taaa|ttta[agt]cct',
      'aggg[acg]aaa|ttt[cgt]ccct',
      'agggt[cgt]aa|tt[acg]accct',
      'agggta[cgt]a|t[acg]taccct',
      'agggtaa[cgt]|[acg]ttaccct'
    ];
    var regexp = [];
    for(var p in pattern) {
      regexp.add(new RegExp(p, caseSensitive: false));
    }
    return regexp;
  }());

  replacements = [
    'tHa[Nt]', '<4>',
    'aND|caN|Ha[DS]|WaS', '<3>',
    'a[NSt]|BY', '<2>',
    '<[^>]*>', '|',
    '\\|[^|][^|]*\\|', '-'
  ];

  lengthA = fullText.length;

  fullText = fullText.replaceAll(new RegExp('^>.*\n|\n', multiLine: true), ''); // still ridiculously slow with r21658

  lengthB = fullText.length;

  for(var i = 0; i < regexp.length; ++i) {
    print('${regexp[i].pattern} ${regexp[i].allMatches(fullText).length}');
  }

  for(var i = -1; i < replacements.length - 1;) {
    fullText = fullText.replaceAll(new RegExp(replacements[++i]), replacements[++i]);
  }

  lengthC = fullText.length;

  print('\n$lengthA\n$lengthB\n$lengthC');
}
    

notes, command-line, and program output

NOTES:
64-bit Ubuntu quad core
Dart VM version: 1.24.2 (Thu Jun 22 08:43:26 2017) on "linux_x64"


Thu, 26 Oct 2017 21:32:57 GMT

MAKE:
make: 'regexredux.dart-2.dart_run' is up to date.

0.01s to complete and log all make actions

COMMAND LINE:
/opt/src/dartsdk-linux-x64-release/dart-sdk/bin/dart  regexredux.dart-2.dart 0 < regexredux-input5000000.txt

PROGRAM OUTPUT:
agggtaaa|tttaccct 356
[cgt]gggtaaa|tttaccc[acg] 1250
a[act]ggtaaa|tttacc[agt]t 4252
ag[act]gtaaa|tttac[agt]ct 2894
agg[act]taaa|ttta[agt]cct 5435
aggg[acg]aaa|ttt[cgt]ccct 1537
agggt[cgt]aa|tt[acg]accct 1431
agggta[cgt]a|t[acg]taccct 1608
agggtaa[cgt]|[acg]ttaccct 2178

50833411
50000000
27388361