1 {- | 2 Implement alignments\/edit distance with affine gap penalties 3 4 I've seen g = (-10,-1) as the suggested price to pay for a gaps 5 using BLOSUM62. Good choice as any, I guess. 6 -} 7 8 module Bio.Alignment.AAlign ( 9 -- * Smith-Waterman, or locally optimal alignment with affine gaps 10 local_score, local_align 11 12 -- * Needleman-Wunsch, or globally optimal alignment with affine gaps 13 , global_score, global_align 14 15 ) where 16 17 import Data.List (partition,maximumBy) 18 import Bio.Sequence.SeqData 19 import Bio.Alignment.AlignData 20 21 -- | Minus infinity (or an approximation thereof) 22 minf :: Num a => a 23 minf = -100000000 24 25 -- ------------------------------------------------------------ 26 -- Edit distances 27 28 -- | Calculate global edit distance (Needleman-Wunsch alignment score) 29 global_score :: (Num a, Ord a) => SubstMx a -> (a,a) -> Sequence -> Sequence -> a 30 global_score mx g s1 s2 = uncurry max . last . last 31 $ columns (score_select minf mx g) (0,fst g) s1 s2 32 33 -- | Calculate local edit distance (Smith-Waterman alignment score) 34 local_score :: (Num a, Ord a) => SubstMx a -> (a,a) -> Sequence -> Sequence -> a 35 local_score mx g s1 s2 = maximum . map (uncurry max) . concat 36 $ columns (score_select 0 mx g) (0,fst g) s1 s2 37 38 -- | Generic scoring and selection function for global and local scoring 39 score_select :: (Num a,Ord a) => a -> SubstMx a -> (a,a) -> Selector (a,a) 40 score_select minf mx (go,ge) cds = 41 let (reps,ids) = partition (isRepl.snd) cds 42 s = maximum $ minf:[max sub gap +mx (x,y) | ((sub,gap),Repl x y) <- reps] 43 g = maximum $ minf:[max (sub+go) (gap+ge) | ((sub,gap),_) <- ids] 44 in (s,g) 45 46 -- ------------------------------------------------------------ 47 -- Alignments 48 49 -- maximum and addition for compound values 50 max' (x,ax) (y,yx) = if x>=y then (x,ax) else (y,yx) 51 fp (x,ax) (s,e) = (x+s,e:ax) 52 53 -- | Calculate global alignment (Needleman-Wunsch) 54 global_align :: (Num a, Ord a) => SubstMx a -> (a,a) -> Sequence -> Sequence -> (a,EditList) 55 global_align mx g s1 s2 = revsnd . uncurry max' . last . last 56 $ columns (align_select minf mx g) ((0,[]),(fst g,[])) s1 s2 57 58 -- | Calculate local alignmnet (Smith-Waterman) 59 local_align :: (Num a, Ord a) => SubstMx a -> (a,a) -> Sequence -> Sequence -> (a,EditList) 60 local_align mx g s1 s2 = revsnd . maximumBy (compare `on` fst) 61 . map (uncurry max') . concat 62 $ columns (align_select 0 mx g) ((0,[]),(fst g,[])) s1 s2 63 64 revsnd (s,a) = (s,reverse a) 65 66 -- | Generic scoring and selection for global and local alignment 67 align_select :: (Num a, Ord a) => a -> SubstMx a -> (a,a) -> Selector ((a,EditList),(a,EditList)) 68 align_select minf mx (go,ge) cds = 69 let (reps,ids) = partition (isRepl.snd) cds 70 s = maximumBy (compare `on` fst) 71 $ (minf,[]):[max' sub gap `fp` (mx (x,y),e) | ((sub,gap),e@(Repl x y)) <- reps] 72 g = maximumBy (compare `on` fst) 73 $ (minf,[]):[max' (sub `fp` (go,e)) (gap `fp` (ge,e)) | ((sub,gap),e) <- ids] 74 in (s,g)