1 -- Test for sequence functionality
    2 
    3 module Bio.Sequence.Test where
    4 
    5 import Test.QuickCheck
    6 import System.IO.Unsafe
    7 import Data.Maybe (isJust)
    8 
    9 import Bio.Sequence.HashWord
   10 import Bio.Util.TestBase
   11 import Bio.Sequence
   12 
   13 tests_io :: [Test]
   14 --              .........o.........o.........o
   15 tests_io = [ T "serializing"  prop_serialize
   16            , T "serialize qual" prop_serialize_qual
   17            , T "serialize fasta+qual" prop_serialize_fastaQual
   18            , T "serialize fasta+qual multi" prop_serialize_fastaQual2
   19            ]
   20 
   21 prop_serialize (E s) = 
   22     let [s'] = unsafePerformIO (do writeFasta "/tmp/serialize_test" [s]
   23                                    readFasta "/tmp/serialize_test")
   24     in s == s'
   25 
   26 prop_serialize_qual (Eq s@(Seq h d q)) = 
   27     let [(Seq h' d' q')] = unsafePerformIO 
   28                            (do writeQual "/tmp/serialize_qual" [s]
   29                                readQual "/tmp/serialize_qual")
   30     in h == h' && q == q'
   31 
   32 prop_serialize_fastaQual (Eq s) = 
   33     let [s'] = unsafePerformIO 
   34                            (do writeFastaQual "/tmp/serialize_fasta" "/tmp/serialize_qual" [s]
   35                                readFastaQual "/tmp/serialize_fasta" "/tmp/serialize_qual")
   36     in s' == s
   37 
   38 prop_serialize_fastaQual2 :: [ESTq] -> Bool
   39 prop_serialize_fastaQual2 es = 
   40     let ests = map (\(Eq x) -> x) es
   41         ests' = unsafePerformIO 
   42                 (do writeFastaQual "/tmp/serialize_fasta" "/tmp/serialize_qual" ests
   43                     readFastaQual "/tmp/serialize_fasta" "/tmp/serialize_qual")
   44     in ests' == ests
   45 
   46 -- ----------------------------------------------------------
   47 -- Tests for HashWord
   48 
   49 tests_hw :: [Test]
   50 --              .........o.........o.........o
   51 tests_hw = [ T "n2k vs k2n"   prop_n2k_k2n
   52            , T "contigous_0"  prop_contigous_0
   53            , T "prop_rcontig_0" prop_rcontig_0
   54            , T "prop_rcontig_1" prop_rcontig_1
   55            , T "prop_rclast" prop_rclast
   56            ]
   57 
   58 prop_n2k_k2n :: Int -> Bool
   59 prop_n2k_k2n i' = let i = abs i' in (n2k 8 . k2n 8) i == i
   60 
   61 -- check that hashes is equal to hash over all indices
   62 prop_contigous_0 k (E s) = k > 0 ==> hashes (contigous k) (seqdata s) == let indices =  [0..seqlength s-fromIntegral k] 
   63                        in map (\(Just i,j)->(i,j)) $ filter (isJust.fst) $ zipWith (,) (map (hash (contigous k) (seqdata s)) indices) indices
   64 
   65 -- rcontig is the minimum of hash of each forward word and its reverse complement
   66 prop_rcontig_0 k (E s) = k > 0 ==> zipWith min (map fst . hashes (contigous k) . seqdata $ s) (map fst . reverse . hashes (contigous k) . seqdata . revcompl $ s)
   67                            == (map fst $ hashes (rcontig k) (seqdata s))
   68 
   69 -- check that reverse (hashes . reverse) == id
   70 prop_rcontig_1  k (E s) = k > 0 ==> (reverse . map fst . hashes (rcontig k) . seqdata . revcompl $ s) 
   71                             == (map fst . hashes (rcontig k) . seqdata $ s)
   72 
   73 -- remove duplicates, and check key values vs rcontig
   74 prop_rcpacked_1 = undefined
   75 
   76 -- last hash is equal to first hash on revcompl seq.
   77 -- see hashcount below
   78 prop_rclast k (E s) = k > 0 && (not . null . hs $ s) ==> rcl k s
   79     where hs = map fst . hashes (rcontig k) . seqdata
   80 
   81 rcl :: Int -> Sequence -> Bool
   82 rcl k s = ((last . hs $ s) == (head . hs . revcompl $ s))
   83     where hs = map fst . hashes (rcontig k) . seqdata
   84 
   85 -- benchmarks: todo: time hash generation for contig, rcontig, and gapped (when implemented)
   86 bench = [ T "rc hash counts int  (8)" (hashcount_int  8)
   87         , T "rc hash counts int (16)" (hashcount_int 16)
   88         , T "rc hash counts     (16)" (hashcount 16)
   89         , T "rc hash counts     (32)" (hashcount 32)
   90         ]
   91 
   92 hashcount, hashcount_int :: Int -> EST_set -> Property
   93 
   94 hashcount k es' = k > 0 ==> let ESet es = es' 
   95                                 hs :: Sequence -> [Integer]
   96                                 hs = map fst . hashes (rcontig k) . seqdata
   97                             in and $ map (\e -> null (hs e) || rcl k e || error (show k ++"\n" ++ toStr (seqdata e))) es
   98 hashcount_int k es' = k > 0 ==> let ESet es = es' 
   99                                     hs :: Sequence -> [Int]
  100                                     hs = map fst . hashes (rcontig k) . seqdata 
  101                                 in and $ map (\e -> null (hs e) || rcl k e || error (toStr $ seqdata e)) es