1 -- Tests for alignments 2 3 module Bio.Alignment.Test where 4 import Bio.Sequence 5 import Bio.Util.TestBase 6 import Bio.Alignment.AlignData (showalign) 7 import Bio.Alignment.Matrices as M 8 9 import Test.QuickCheck 10 import Bio.Alignment.QAlign as Q 11 import Bio.Alignment.AAlign as A 12 13 import Bio.Alignment.AlignData (toStrings, extractGaps, insertGaps) 14 import Bio.Alignment.Multiple 15 16 import Data.List (intersperse,sort,nub) 17 import qualified Data.ByteString.Lazy as B 18 import qualified Data.ByteString.Lazy.Char8 as BC 19 20 -- default blastn parameters 21 mx = Q.qualMx 22 amx = M.blastn_default 23 g = (-5,-2) 24 25 tests :: [Test] 26 -- .........o.........o.........o 27 tests = [ T "gapped assembly invariant" prop_asm_gaps 28 29 , T "global reverse" (prop_reverse_g mx g) 30 , T "global reverse w/qual" (prop_reverse_g_qual mx g) 31 , T "local reverse" (prop_reverse_l mx g) 32 , T "local reverse w/qual" (prop_reverse_l_qual mx g) 33 , T "global recovery" (prop_recover_g mx g) 34 35 , T "global score <= local" (prop_global_local mx g) 36 , T "global score <= overlap" (prop_global_overlap mx g) 37 , T "overlap score <= local" (prop_overlap_local mx g) 38 39 , T "overlap score = align" (prop_overlap_score_align mx g) 40 41 , T "s == s => equal scores" (prop_equal_scores mx g) 42 43 -- should give same result without n's(?) 44 , T "global aalign = qualign" prop_AAlign_QAlign_g 45 , T "local aalign = qualign" prop_AAlign_QAlign_l 46 47 -- are these really correct? 48 , T "decr qual => decr score (l)" (prop_quality_dec_l mx (-100,-100)) 49 , T "decr qual => decr score (g)" (prop_quality_dec_g mx (-100,-100)) 50 51 , T "indirect recover (g)" prop_indirect_recover 52 ] 53 54 -- pretty ugly, to ensure the gaps are correctly generated 55 prop_asm_gaps :: (String,[Int]) -> Bool 56 prop_asm_gaps (s,gs) = let (str,gaps) = (BC.pack $ filter (/='-') s 57 , nub $ takeWhile (<= BC.length str) $ dropWhile (<0) $ map fromIntegral $ sort gs) 58 in (str,gaps) == (extractGaps . insertGaps '*' $ (str,gaps)) 59 60 -- these aren't entirely equivalent, as the different column generators or selector 61 -- sometimes make different, but equally scoring, choices. Should be fixed, but in 62 -- the meantime, checking scores to within an epsilot should be okay. 63 -- (May fail if two n's are aligned?) 64 prop_AAlign_QAlign_g (E s1) (E s2) = 65 let qmx = qualMx 22 22 66 a = (A.global_align qmx g s1 s2) 67 q = (Q.global_align mx g s1 s2) 68 in if abs (fst a - fst q) < 0.001 then True 69 else error ("\n"++show (fst a)++"\n"++ showalign (snd a) 70 ++"\n"++show (fst q) ++ "\n"++ showalign (snd q)) 71 72 prop_AAlign_QAlign_l (E s1) (E s2) = 73 let qmx = qualMx 22 22 74 a = (A.local_align qmx g s1 s2) 75 q = (Q.local_align mx g s1 s2) 76 in if abs (fst a - fst q) < 0.001 then True 77 else error ("\n"++show (fst a)++"\n"++ showalign (snd a) 78 ++"\n"++show (fst q) ++ "\n"++ showalign (snd q)) 79 80 -- Check reverse without quality values 81 prop_reverse_g mx g (E s1) (E s2) = 82 abs (Q.global_score mx g s1 s2 83 - Q.global_score mx g (revcompl s1) (revcompl s2)) < 0.1 84 85 prop_reverse_l mx g (E s1) (E s2) = 86 abs (Q.local_score mx g s1 s2 87 - Q.local_score mx g (revcompl s1) (revcompl s2)) < 0.1 88 89 -- Check reverse with quality 90 prop_reverse_g_qual mx g (Eq s1) (Eq s2) = 91 abs (Q.global_score mx g s1 s2 92 - Q.global_score mx g (revcompl s1) (revcompl s2)) < 0.1 93 94 prop_reverse_l_qual mx g (Eq s1) (Eq s2) = 95 abs (Q.local_score mx g s1 s2 96 - Q.local_score mx g (revcompl s1) (revcompl s2)) < 0.1 97 98 -- Check that alignments are produced from the correct sequences 99 prop_recover_g mx g (Eq s1) (Eq s2) = 100 let (s1',s2') = toStrings $ snd $ Q.global_align mx g s1 s2 101 in filter (/='-') s1' == toStr (seqdata s1) 102 && filter (/='-') s2' == toStr (seqdata s2) 103 104 -- global <= overlap <= local 105 -- Global score never exceeds optimal local score 106 prop_global_local mx g (Eq s1) (Eq s2) = 107 Q.global_score mx g s1 s2 <= Q.local_score mx g s1 s2 108 109 -- Global score never exceeds optimal overlap score 110 prop_global_overlap mx g (Eq s1) (Eq s2) = 111 Q.global_score mx g s1 s2 <= Q.overlap_score mx g s1 s2 112 113 -- Overlap score never exceeds optimal local score 114 prop_overlap_local mx g (Eq s1) (Eq s2) = 115 Q.overlap_score mx g s1 s2 <= Q.local_score mx g s1 s2 116 117 prop_overlap_score_align mx g (Eq s1) (Eq s2) = 118 abs (Q.overlap_score mx g s1 s2 - fst (Q.overlap_align mx g s1 s2)) < 0.1 119 120 prop_equal_scores mx g (Eq s1) = 121 let qg = Q.global_score mx g s1 s1 122 ql = Q.local_score mx g s1 s1 123 qo = Q.overlap_score mx g s1 s1 124 in qg == ql && ql == qo 125 126 -- Sinking score with sinking quality - local (same as global) 127 prop_quality_dec_l mx g (E (Seq h d _)) = let 128 q10 = Just $ B.map (const 10) d 129 q20 = Just $ B.map (const 20) d 130 in Q.local_score mx g (Seq h d q10) (Seq h d q10) 131 <= Q.local_score mx g (Seq h d q20) (Seq h d q20) 132 133 -- Sinking score with sinking quality - global 134 -- NB: score is always positive (aligning seq. with itself) 135 prop_quality_dec_g mx g (E (Seq h d _)) = let 136 q10 = Just $ B.map (const 10) d 137 q20 = Just $ B.map (const 20) d 138 in Q.global_score mx g (Seq h d q10) (Seq h d q10) 139 <= Q.global_score mx g (Seq h d q20) (Seq h d q20) 140 141 -- Sinking avg score with sinking quality 142 -- This is not correct, low quality reduces the penalty for mismatch 143 -- much harder than the reward for a match. 144 -- (E.g. mmmxmmm may score higher with poor quality) 145 prop_quality_dec2 mx g (E (Seq h d _)) (E (Seq h' d' _)) = let 146 q10 = Just $ B.map (const 10) d 147 q20 = Just $ B.map (const 20) d 148 (s1,a1) = Q.local_align mx g (Seq h d q10) (Seq h' d' q10) 149 (s2,a2) = Q.local_align mx g (Seq h d q20) (Seq h' d' q20) 150 in if null a1 && null a2 then True 151 else s1 / fromIntegral (length a1) <= s2 / fromIntegral (length a2) 152 153 -- not expected to pass(?), but useful to debug indirect alignments 154 prop_indirect (E s1) (E s2) (E s3) = let a1 = snd $ A.global_align amx g s1 s2 155 a2 = snd $ A.global_align amx g s2 s3 156 a3 = snd $ A.global_align amx g s1 s3 157 i1 = indirect a1 a2 158 in if a3 == i1 then True 159 else error ("\n"++unlines [showalign a1, showalign a2,"",showalign a3,"",showalign i1]) 160 161 prop_indirect_recover (E s1) (E s2) (E s3) = 162 let a1 = snd $ A.global_align amx g s1 s2 163 a2 = snd $ A.global_align amx g s2 s3 164 i1 = indirect a1 a2 165 (s1',s3') = toStrings i1 166 f = filter (/= '-') 167 in if f s1' == toStr (seqdata s1) && f s3' == toStr (seqdata s3) 168 then True else error ("\n"++(unlines [showalign a1, showalign a2,"",showalign i1])) 169 170 {- 171 -- | Affine = Simple when go == ge 172 prop_affine1 s1 s2 = S.global_align mx g s1 s2 == A.global_align mx (g,g) s1 s2 173 prop_affine2 s1 s2 = S.global_score mx g s1 s2 == A.global_score mx (g,g) s1 s2 174 -} 175 176 {- 177 -- | For a given scoring scheme, scoring must correspond to alignments 178 prop_score1 mx g (Eq s1) (Eq s2) = 179 sum . S.eval mx g . snd . S.global_align mx g s1 $ s2 180 == S.global_score mx g s1 s2 181 -- -> todo: ditto for A and Q, ditto for local 182 -} 183 184 {- 185 -- | Any global alignment should return the exact same sequences 186 prop_recover mx g s1 s2 = undefined 187 188 -- | Qual = Affine when no quality data are supplied 189 prop_qual s1 s2 = undefined 190 191 prop_local s1 s2 = S.global_score mx g s1 s2 <= S.local_score mx g s1 s2 192 -- -> todo: ditto for A and Q 193 194 -- | Verify that all columns have the same lenght (ie. it should be a square matrix) 195 -- (Check vs sequence lengths) Both QAlign.columns and AlignData.columns 196 prop_columns = undefined 197 -} 198 199 200 bench :: [Test] 201 -- .........o.........o.........o 202 bench = [ T "local score, short" bench_local_rev_s 203 , T "overlap score, short" bench_overlap_rev_s 204 , T "global score, short" bench_global_rev_s 205 206 , T "local score, long" bench_local_rev_l 207 , T "overlap score, long" bench_overlap_rev_l 208 , T "global score, long" bench_global_rev_l 209 ] 210 211 bench_global_rev_s (ES x) (ES y) = 212 abs (Q.global_score mx g x y 213 - Q.global_score mx g (revcompl x) (revcompl y)) < 0.1 214 215 bench_local_rev_s (ES x) (ES y) = 216 abs (Q.local_score mx g x y 217 - Q.local_score mx g (revcompl x) (revcompl y)) < 0.1 218 219 bench_overlap_rev_s (ES x'@(Seq h d _)) (ES y'@(Seq h2 d2 _)) = 220 let x = Seq h d Nothing 221 y = Seq h2 d2 Nothing 222 f = Q.overlap_align mx g x y 223 r = Q.overlap_align mx g (revcompl x) (revcompl y) 224 in if abs (fst f-fst r) < 0.1 then True 225 else error ("\nFwd: "++show (fst f)++"\n"++ showalign (snd f) 226 ++"\nRev: "++show (fst r)++"\n"++ showalign (snd r) 227 ++"\n"++toStr (seqdata x)++"\n"++(toStr $ seqdata y)) 228 229 bench_overlap_rev_l (EL x) (EL y) = 230 abs (Q.overlap_score mx g x y 231 - Q.overlap_score mx g (revcompl x) (revcompl y)) < 0.1 232 233 bench_global_rev_l (EL x) (EL y) = 234 abs (Q.global_score mx g x y 235 - Q.global_score mx g (revcompl x) (revcompl y)) < 0.1 236 237 bench_local_rev_l (EL x) (EL y) = 238 abs (Q.local_score mx g x y 239 - Q.local_score mx g (revcompl x) (revcompl y)) < 0.1