source code
--
-- The Computer Language Benchmarks Game
-- http://benchmarksgame.alioth.debian.org/
--
-- regex-dna program contributed by Sergei Matusevich 2007
-- modified by Tim Newsham
-- modified by Stephen Blackheath 2009, v1.0
-- mostly redone by Louis Wasserman, 2010
-- converted from regex-dna program
import Control.Concurrent
import Control.Parallel.Strategies
import Control.Monad
import GHC.Conc
import Foreign
import Text.Regex.PCRE
import Text.Regex.PCRE.ByteString -- requires haskell-regex-pcre-builtin
import qualified Data.ByteString as B
import qualified Data.ByteString.Internal as BI
import Data.Array.Base
import Data.List
import Data.Word
import Data.ByteString.Unsafe
subs = [
("tHa[Nt]", "<4>"),
("aND|caN|Ha[DS]|WaS", "<3>"),
("a[NSt]|BY", "<2>"),
("<[^>]*>", "|"),
("\\|[^|][^|]*\\|", "-")]
main = do
file <- B.getContents
let variants = map (\ x -> (x, makeRegex x)) [
"agggtaaa|tttaccct",
"[cgt]gggtaaa|tttaccc[acg]",
"a[act]ggtaaa|tttacc[agt]t",
"ag[act]gtaaa|tttac[agt]ct",
"agg[act]taaa|ttta[agt]cct",
"aggg[acg]aaa|ttt[cgt]ccct",
"agggt[cgt]aa|tt[acg]accct",
"agggta[cgt]a|t[acg]taccct",
"agggtaa[cgt]|[acg]ttaccct" ] :: [(String, Regex)]
let [s1,s2,s3] = map (B.concat . tail) $
groupBy notHeader $ B.split (BI.c2w '\n') file
showVars :: (String, Regex) -> String
showVars (rx,r) = let m2 = match r s2; m3 = match r s3 :: Int in
m2 `par` m3 `seq` (rx ++ ' ' : show (m2 + m3))
results = map showVars variants ++ [
"",
show $ B.length file,
show $ B.length s1 + B.length s2 + B.length s3]
store <- newEmptyMVar
let chunks = fragment 20000 s2 -- break into chunks to parallelize, which
-- is possible as our regexes are 1 char long
s1 `seq` s2 `seq` s3 `seq` (variants `using` parList (evalTuple2 r0 rseq)) `par`
forkIO (parallel (map substituteAll chunks) >>= putMVar store)
-- do regex substitutions
mapM_ putStrLn (results `using` parList rdeepseq)
chunks' <- takeMVar store
print $ B.length s1 + B.length s3 + chunks'
where notHeader _ s = B.null s || B.head s /= (BI.c2w '>')
-- Drop in replacement for sequence
parallel :: [IO Int] -> IO Int
parallel actions = do
vars <- mapM (\ action -> do
var <- newEmptyMVar
forkIO $ do
answer <- action
putMVar var $! answer
return var) actions
foldM (\ !x v -> liftM (+x) (takeMVar v)) 0 vars
fragment :: Int -> B.ByteString -> [B.ByteString]
fragment chunkSize bs = if B.null bs then [] else
case B.splitAt chunkSize bs of
(start, rem) -> start : fragment chunkSize rem
-- Precompile regexes
subRegexes :: [(Regex, B.ByteString)]
subRegexes = flip map subs $ \(pattern, sub) ->
(makeRegex pattern :: Regex, B.pack (map BI.c2w sub))
extend :: B.ByteString -> IO B.ByteString
extend src = do
destFP <- BI.mallocByteString (B.length src * 3)
copyBS src destFP
copyBS :: B.ByteString -> ForeignPtr Word8 -> IO B.ByteString
copyBS (BI.PS srcFP srcOff srcLen) destFP = withForeignPtr srcFP $ \ src0 ->
withForeignPtr destFP $ \ dest0 -> do
copyArray dest0 (src0 +! srcOff) srcLen
return (BI.PS destFP 0 srcLen)
substituteAll :: B.ByteString -> IO Int
substituteAll !txt@(BI.PS srcFP srcOff srcLen) = allocaArray (B.length txt * 3) $ \ destP -> do
destFP <- newForeignPtr_ destP
withForeignPtr srcFP $ \ srcP -> copyArray destP (srcP `advancePtr` srcOff) srcLen
let dest = BI.PS destFP 0 srcLen
allocaArray (B.length txt * 3) $ \ tmp -> do
tmpF <- newForeignPtr_ tmp
foldM (\ !n sub -> do
n' <- substitute_ tmp (BI.PS destFP 0 n) sub
copyArray destP tmp n'
return n') srcLen subRegexes
(+!) = advancePtr
substitute_ :: Ptr Word8 -> B.ByteString -> (Regex, B.ByteString) -> IO Int
substitute_ !p xs@(BI.PS fp0 i0 l0) (regex, BI.PS fpSub iSub lSub) =
withForeignPtr fp0 $ \ p00 -> let p0 = p00 +! i0 in withForeignPtr fpSub $ \ pSub -> do
len <- do
let go !i !j = do
match <- execute regex (unsafeDrop i xs)
case match of
Right (Just arr) -> do
let !(!off, !len) = arr `unsafeAt` 0
copyArray (p +! j) (p0 +! i) off
copyArray (p +! (j + off)) (pSub +! iSub) lSub
go (i + off + len) (j + off + lSub)
_ -> copyArray (p +! j) (p0 +! i) (l0 - i) >> return (j + l0 - i)
go 0 0
return len -- destFP now points to the substituted string
notes, command-line, and program output
NOTES:
64-bit Ubuntu quad core
The Glorious Glasgow Haskell Compilation System, version 8.4.1
Fri, 23 Mar 2018 19:33:45 GMT
MAKE:
mv regexredux.ghc-2.ghc regexredux.ghc-2.hs
/opt/src/ghc-8.4.1/bin/ghc --make -fllvm -O2 -XBangPatterns -threaded -rtsopts regexredux.ghc-2.hs -o regexredux.ghc-2.ghc_run
[1 of 1] Compiling Main ( regexredux.ghc-2.hs, regexredux.ghc-2.o )
regexredux.ghc-2.hs:57:57: error:
Variable not in scope:
evalTuple2 :: Strategy a0 -> t0 -> Strategy (String, Regex)
|
57 | s1 `seq` s2 `seq` s3 `seq` (variants `using` parList (evalTuple2 r0 rseq)) `par`
| ^^^^^^^^^^
regexredux.ghc-2.hs:57:71: error:
• Variable not in scope: rseq
• Perhaps you meant one of these:
‘seq’ (imported from Prelude), ‘pseq’ (imported from GHC.Conc)
|
57 | s1 `seq` s2 `seq` s3 `seq` (variants `using` parList (evalTuple2 r0 rseq)) `par`
| ^^^^
regexredux.ghc-2.hs:60:43: error:
Variable not in scope: rdeepseq :: Strategy String
|
60 | mapM_ putStrLn (results `using` parList rdeepseq)
| ^^^^^^^^
/home/dunham/benchmarksgame/nanobench/makefiles/u64q.programs.Makefile:340: recipe for target 'regexredux.ghc-2.ghc_run' failed
make: [regexredux.ghc-2.ghc_run] Error 1 (ignored)
rm regexredux.ghc-2.hs
0.49s to complete and log all make actions
COMMAND LINE:
./regexredux.ghc-2.ghc_run +RTS -N4 -H250M -RTS 0 < regexredux-input50000.txt
MAKE ERROR