{- Implement Sanger type sequencing
   
   Random primer position
   Terminate after ...850±50 nucleotides(?)
   initial errors, uniform errors, terminal errors
-}   

module Sanger (sanger) where
import UnfoldMut

-- Sanger error distribution.  High initial rate,
-- long stretch of high quality, then degradation at the end
sanger_dist :: Distribution
sanger_dist = combine [ (*0.05) . uniform       -- 0.5% error rate in HQ region
                      , (*0.30) . gradient 25 0 -- high initial error rate
                      , (*0.40) . sigma 700 25] -- terminal degradation

mods :: [Char]
mods = "ACGTN"

-- | Relationship of 3:1 between substitutions and indels
s_mutate :: [Mutator]
s_mutate = mkmut ((*0.60) . sanger_dist) (subst mods) -- 3/4 subst
        ++ mkmut ((*0.10) . sanger_dist) (ins mods)   -- 1/8 insert 
        ++ mkmut ((*0.10) . sanger_dist)  del         -- 1/8 delete
        ++ mkmut ((*0.30) . sigma 700 25) dup         -- terminal duplications
        ++ mkmut ((*0.20) . sigma 700 25) del         -- ...and deletions

terminator :: Terminator
terminator (MS _ _ l) = sigma 850 25 $ fromIntegral l

sanger :: [String] -> Model
sanger = \[n,d] -> (p_uniform [n,d],s_mutate,terminator)
