-- FlowEr - FLOWgram ExtractoR
module Main (main) where

import Bio.Sequence.SFF
import Bio.Sequence.SFF_filters
import Bio.Sequence.Fasta
import Bio.Sequence.FastQ (hWriteSangerQ,hWriteIllumina)
import Bio.Sequence.SeqData (seqdata)
-- import Bio.Util (countIO)

import Print
import Text.Printf

import System.IO (stdout, Handle, openFile, IOMode(..), hClose, hPutStrLn)
-- import System.Environment (getArgs)

import Numeric (showFFloat)
import Data.Char (toLower)
import Data.List (intersperse)
import Data.ByteString.Char8 (unpack,ByteString)
import qualified Data.ByteString.Char8 as B
import qualified Data.ByteString as B1
import qualified Data.ByteString.Lazy.Char8 as L
import qualified Data.ByteString.Lazy as L1

import Data.Array.Unboxed
import Data.Array.ST
import Control.Monad.ST
import Control.Monad.State

import Metrics
import qualified Options as O
import Options (Opts)
import Fork

main :: IO ()
main = do
  opts <- O.getArgs
  when (null $ O.inputs opts) $ error "Please provide an input file - or use --help for more information."
  forkAndWait $ buildActions opts

type Action  = IO ()
type Trimmer = ReadBlock -> ReadBlock

buildActions :: Opts -> [Action]
buildActions o = let
    inp = mapM readSFF (O.inputs o)
    tr = map (mkTrimmer o)
    ch (SFF h _) = h
    rs (SFF _ r) = r
    in snd $ flip runState [] $ do
      on (O.info o)      (\h -> mapM_ (hPutStrLn h . getHeader . ch) =<< inp)
      on (O.fasta o)     (\h -> mapM_ (hWriteFasta h . map rbToSequence . tr . rs) =<< inp)
      on (O.fqual o)     (\h -> mapM_ (hWriteQual h . map rbToSequence . tr . rs) =<< inp)
      on (O.text o)      (\h -> mapM_ (hPutStrLn h . dumpText . tr . rs) =<< inp)
      on (O.fastq o)     (\h -> mapM_ (hWriteSangerQ h . map rbToSequence . tr . rs) =<< inp)
      on (O.illumina o)  (\h -> mapM_ (hWriteIllumina h . map rbToSequence . tr . rs) =<< inp)
      on (O.summarize o) (\h -> mapM_ (L1.hPut h . summarize . tr . rs) =<< inp)  -- should we trim?
      on (O.filters o)   (\h -> mapM_ (L1.hPut h . sum_filters . rs) =<< inp)
      on (O.histogram o) (\h -> mapM_ (\(SFF c r) -> hPutStrLn h . showHist . histogram (B.unpack $ flow c) . map flowgram . tr $ r) =<< inp)
      on (O.flowgram o)  (\h -> mapM_ (\(SFF c r) -> L1.hPut h . L1.fromChunks . intersperse (B.pack "\n") . concatMap (showread c) $ r) =<< inp)

on :: Maybe FilePath -> (Handle -> Action) -> State [Action] ()
on Nothing _    = return ()
on (Just f) act = modify $ (:) $ case f of 
  "-" -> act stdout
  _   -> do h <- openFile f WriteMode
            act h
            hClose h

mkTrimmer :: Opts -> Trimmer
mkTrimmer o = case (O.trimKey o, O.trim o) of
        (True,True) -> error "Please specify only one of --trim and --trimkey"
        (True,False) -> \r -> trimFromTo 5 (num_bases $ read_header r) r
        (False,True) -> trim
        (False,False) -> id

-- ------------------------------------------------------------
-- No option - dump as text format
-- ------------------------------------------------------------
dumpText :: [ReadBlock] -> String
dumpText rs = concat . map toText $ rs
  where toText :: ReadBlock -> String
        toText r = concat [ gt, B.unpack (read_name rh), nl
                          , maybe "" ((\s->info++s++nl) . formatRN) $ decodeReadName (read_name rh)
                          , let (lf,rt) = (clip_adapter_right rh, clip_adapter_left rh) 
                            in if lf /= 0 || rt /= 0 then adapter ++ show lf ++ sp++ show rt else ""
                          , clip,     show (clip_qual_left rh), sp, show (clip_qual_right rh), nl
                          , flows,    B.unpack $ B.unwords $ map fi $ flowgram r, nl
                          , idx,      unwords $ map show $ cumulative_index' r, nl
                          , base,     L.unpack (seqdata . rbToSequence $ r), nl
                          , qual,     unwords $ map show $ L1.unpack (quality r), nl
                             ]
          where rh = read_header r
                gt = ">"
                nl = "\n"
                sp = " "
                info     = "  Info: \t"
                clip     = "  Clip: \t"
                adapter  = "  Adap: \t"
                flows    = "  Flows:\t"
                idx      = "  Index:\t"
                base     = "  Bases:\t"
                qual     = "  Quals:\t"
                formatRN (ReadName (yr,mo,dy) (h,m,s) r' x y) = 
                  printf "%4d-%02d-%02d %02d:%02d:%02d R%d (%d,%d)" yr mo dy h m s r' x y

cumulative_index' :: ReadBlock -> [Int]
cumulative_index' = scanl1 (+) . map fromIntegral . B1.unpack . flow_index

-- ------------------------------------------------------------
-- The -i option: Print header info
-- ------------------------------------------------------------
getHeader :: CommonHeader -> String
getHeader h = unlines ["Index:    \t" ++ show (index_offset h,index_length h)
                      ,"Num_reads:\t" ++ show (num_reads h)
                      ,"Num_flows:\t" ++ show (flow_length h)
                      ,"Key:      \t" ++ unpack (key h)
                      ]

-- ----------------------------------------------------------
-- The -s option: Summarize each read on one line
-- ----------------------------------------------------------

-- | Summarize each read on one line of output
summarize :: [ReadBlock] -> L.ByteString
summarize rs = do
  L.concat [ L.pack "# name........\tdate......\ttime....\treg\ttrim_l\ttrim_r\tx_loc\ty_loc\tlen\tK2\ttrimK2\tncount\tavgQ\ttravgQ\n"
           , toLazyByteString . mconcat . map sum1 $ rs]

-- todo: date and time are usually constants!
sum1 :: ReadBlock -> Builder
sum1 r = let rh = read_header r
             nb = num_bases rh
             h = read_name rh
             tr = trim r
             tb, nl, q :: Builder
             tb = char '\t'
             nl = char '\n'
             q  = char '?'
             
             (rndec1,rndec2) = case decodeReadName h of Just rn -> let ((y,m,d),reg,(hh,mm,ss)) = (date rn,region rn,time rn)
                                                                   in ([putDate y m d, putTime hh mm ss, putInt2 reg]
                                                                      ,[putInt (fromIntegral $ x_loc rn), putInt (fromIntegral $ y_loc rn)])
                                                        Nothing -> ([q,q,q],[q,q])
             (qleft,qright) = (clip_qual_left rh, clip_qual_right rh)
             avg_qual qs = let l = fromIntegral (L1.length qs)
                           in if l>0 then putFix 2 $ sum (map fromIntegral $ L1.unpack qs) * 100 `div` l
                             else putFix 2 0
         in mconcat $ intersperse tb ([fromByteString h]
                     ++ rndec1 ++ [putInt (fromIntegral qleft), putInt (fromIntegral qright)] ++ rndec2 
                     ++ [putInt (fromIntegral nb)
                        , fromByteString (fi $ quals $ flowgram r), fromByteString (fi $ quals $ flowgram tr)
                        , putInt (n_count r)
                        , avg_qual $ quality r, avg_qual $ quality tr]) ++ [nl]

-- ----------------------------------------------------------
-- The --filters option, summarize filters
-- ----------------------------------------------------------
sum_filters ::  [ReadBlock] -> L1.ByteString
sum_filters rs = toLazyByteString $ mconcat (header:map sumf1 rs)
  where 
    header = fromByteString $ B.pack "# name..... \tlength \tl_trim \tr_trim \tE K D M L\tSig Q20 Adp\n"
    sumf1 rb = let
      rh = read_header rb
      rn = read_name rh
      nb = fromIntegral $ num_bases rh
      (cl,cr) = (fromIntegral $ clip_qual_left rh, fromIntegral $ clip_qual_right rh)
      dfs = mconcat $ intersperse (char ' ') $
            map (\f -> if f rb then char '+' else char ' ') 
            [discard_empty, discard_key "tcag", discard_dots 0.05, discard_mixed, discard_length 186]
      tfs = mconcat $ intersperse (char ' ') $ map (\f -> putInt3 (f rb))
            [sigint, qual20 10, find_primer rapid_adapter]
      in mconcat (intersperse (char '\t') [fromByteString rn, putInt nb, putInt cl, putInt cr, dfs, tfs]++[char '\n'])

-- ----------------------------------------------------------
-- The -F option: Output the sequence of flows, one flow per line
-- ----------------------------------------------------------

fi :: Flow -> ByteString
fi f | f <= 9999 && f >= 0 = farray!f
     | otherwise = let (i,r) = f `divMod` 100 in B.pack (show i++"."++show r) 
     -- error ("Can't show flow values outside [0..99.99] (You had: "++show f++")")

farray :: Array Flow ByteString
farray = listArray (0,9999) [B.pack (showFFloat (Just 2) i "") | i <- [0,0.01..99.99::Double]]

tab :: ByteString
tab = B.pack "\t"

showread :: CommonHeader -> ReadBlock -> [ByteString]
showread h rd = let rh = read_header rd
                    rn = read_name rh
                    maskFlows = mask rh 1 qgroups . unpack 
                    qgroups = qgroup (B1.unpack $ flow_index rd) (L1.unpack $ quality rd)
                    format p c v q = B.concat [rn,tab,B.pack (show p),tab,B.pack [c],tab,fi v,tab,B.pack (init $ drop 1 $ show q)]
                in zipWith4 format [(1::Int)..] (maskFlows $ flow h) (flowgram rd) qgroups

-- lower case based on the clip_qual values
mask :: ReadHeader -> Int -> [[a]] -> [Char] -> [Char]
mask _ _ _ [] = [] -- qgroups are infinite
mask rh p (q1:qs) (c:cs) = c' : mask rh (p+length q1) qs cs
    where c' = if fromIntegral p < clip_qual_left rh || fromIntegral p > clip_qual_right rh then toLower c else c
mask _ _ _ _ = error "internal error in 'mask'"

zipWith4 :: (a -> b -> c -> d -> e) -> [a] -> [b] -> [c] -> [d] -> [e]
zipWith4 f (a:as) (b:bs) (c:cs) (d:ds) =  f a b c d : zipWith4 f as bs cs ds
zipWith4 _ _ _ _ _ = []

-- | Take the unpacked index_offsets and quality values, and return 
--   a list of groups of quality values, each group corresponding to a flow value. 
--   Flow values < 0.5 result in empty groups.
qgroup :: [Index] -> [Qual] -> [[Qual]]
qgroup [] []       = let rest = []:rest in rest
qgroup is@(1:_) qs = let (iz,irest) = span (==0) (tail is)
                         (q1,qrest) = splitAt (length iz+1) qs
                     in q1 : qgroup irest qrest
qgroup (i:is) qs = [] : qgroup (i-1:is) qs
qgroup _ _ = error "internal error in 'qgroup'"

-- ----------------------------------------------------------
-- The -h option: Output a histogram of flow values
-- ----------------------------------------------------------

type Hist = UArray Flow Int

histogram :: String -> [[Flow]] -> (Hist,Hist,Hist,Hist)
histogram fl scores = runST $ do 
  let zero = newArray (0,9999) 0 :: ST s (STUArray s Flow Int)
  a <- zero
  c <- zero
  g <- zero
  t <- zero
  let ins1 ('A',i) = bump a i
      ins1 ('C',i) = bump c i
      ins1 ('G',i) = bump g i
      ins1 ('T',i) = bump t i
      ins1 (x,_)   = error ("Illegal character "++show x++" in flow!")
      bump ar i = readArray ar i >>= \x -> writeArray ar i (x+1)
  mapM_ ins1 (zip (cycle fl) (map (\x->if x>9999 || x<0 then 9999 else x) $ concat scores))
  a' <- unsafeFreeze a
  c' <- unsafeFreeze c
  g' <- unsafeFreeze g
  t' <- unsafeFreeze t
  return (a',c',g',t')

showHist :: (Hist,Hist,Hist,Hist) -> String
showHist (as,cs,gs,ts) = "Score\tA\tC\tG\tT\tsum\n" ++ 
    unlines [concat $ intersperse "\t" $ showFFloat (Just 2) (fromIntegral sc/100::Double) "" : map show [as!sc,cs!sc,gs!sc,ts!sc, as!sc+cs!sc+gs!sc+ts!sc]
                 | sc <- [0..9999]] 
