1 {- |
    2    Implement alignments\/edit distance with affine gap penalties
    3 
    4    I've seen g = (-10,-1) as the suggested price to pay for a gaps
    5    using BLOSUM62.  Good choice as any, I guess.
    6 -}
    7 
    8 module Bio.Alignment.AAlign ( 
    9    -- * Smith-Waterman, or locally optimal alignment with affine gaps
   10    local_score, local_align
   11 
   12    -- * Needleman-Wunsch, or globally optimal alignment with affine gaps
   13    , global_score, global_align
   14 
   15    ) where
   16 
   17 import Data.List (partition,maximumBy)
   18 import Bio.Sequence.SeqData
   19 import Bio.Alignment.AlignData
   20 
   21 -- | Minus infinity (or an approximation thereof)
   22 minf :: Num a => a
   23 minf = -100000000
   24 
   25 -- ------------------------------------------------------------
   26 -- Edit distances
   27 
   28 -- | Calculate global edit distance (Needleman-Wunsch alignment score)
   29 global_score :: (Num a, Ord a) => SubstMx a -> (a,a) -> Sequence -> Sequence -> a
   30 global_score mx g s1 s2 = uncurry max . last . last 
   31                           $ columns (score_select minf mx g) (0,fst g) s1 s2
   32 
   33 -- | Calculate local edit distance (Smith-Waterman alignment score)
   34 local_score :: (Num a, Ord a) => SubstMx a -> (a,a) -> Sequence -> Sequence -> a
   35 local_score mx g s1 s2 = maximum . map (uncurry max) . concat 
   36                          $ columns (score_select 0 mx g) (0,fst g) s1 s2
   37 
   38 -- | Generic scoring and selection function for global and local scoring
   39 score_select :: (Num a,Ord a) => a -> SubstMx a -> (a,a) -> Selector (a,a)
   40 score_select minf mx (go,ge) cds = 
   41     let (reps,ids) = partition (isRepl.snd) cds 
   42         s = maximum $ minf:[max sub gap +mx (x,y) | ((sub,gap),Repl x y) <- reps]
   43         g = maximum $ minf:[max (sub+go) (gap+ge) | ((sub,gap),_) <- ids]
   44     in (s,g)
   45 
   46 -- ------------------------------------------------------------
   47 -- Alignments
   48 
   49 -- maximum and addition for compound values
   50 max' (x,ax) (y,yx) = if x>=y then (x,ax) else (y,yx)
   51 fp (x,ax) (s,e) = (x+s,e:ax)
   52 
   53 -- | Calculate global alignment (Needleman-Wunsch)
   54 global_align :: (Num a, Ord a) => SubstMx a -> (a,a) -> Sequence -> Sequence -> (a,EditList)
   55 global_align mx g s1 s2 = revsnd . uncurry max' . last . last 
   56                $ columns (align_select minf mx g) ((0,[]),(fst g,[])) s1 s2
   57 
   58 -- | Calculate local alignmnet (Smith-Waterman)
   59 local_align :: (Num a, Ord a) => SubstMx a -> (a,a) -> Sequence -> Sequence -> (a,EditList)
   60 local_align mx g s1 s2 = revsnd . maximumBy (compare `on` fst)
   61                          . map (uncurry max') . concat
   62                          $ columns (align_select 0 mx g) ((0,[]),(fst g,[])) s1 s2
   63 
   64 revsnd (s,a) = (s,reverse a)
   65 
   66 -- | Generic scoring and selection for global and local alignment
   67 align_select :: (Num a, Ord a) => a -> SubstMx a -> (a,a) -> Selector ((a,EditList),(a,EditList))
   68 align_select minf mx (go,ge) cds = 
   69     let (reps,ids) = partition (isRepl.snd) cds 
   70         s = maximumBy (compare `on` fst) 
   71           $ (minf,[]):[max' sub gap `fp` (mx (x,y),e) | ((sub,gap),e@(Repl x y)) <- reps]
   72         g = maximumBy (compare `on` fst) 
   73           $ (minf,[]):[max' (sub `fp` (go,e)) (gap `fp` (ge,e)) | ((sub,gap),e) <- ids]
   74     in (s,g)