{-# LANGUAGE DeriveDataTypeable #-}

module Main where

import Data.List (foldl', intercalate)
import Data.Maybe (fromMaybe)
import Text.Printf
import System.Console.CmdArgs
import Control.Monad (when)
import qualified Data.IntMap as I
import qualified Data.HashMap.Strict as S
import qualified Data.ByteString.Char8 as B
import Bio.SamTools.Bam.Unsafe

data H = H { paired, orphans, links :: !Double} deriving Show

h0 :: H
h0 = H 0 0 0

type Hist = I.IntMap H
type TargetMap = S.HashMap B.ByteString Hist

data Conf = Conf { bucketsize :: Int, file :: FilePath }
          deriving (Typeable,Data,Eq,Show)

conf :: Conf
conf = Conf { bucketsize = 1000 &= help "Bucket size" &= typ "INT"
            , file = ""         &= args &= typFile }
       &= summary "bamcov - extract bucketwise coverage from a BAM file"
       &= program "bamcov"

main :: IO ()
main = do
  cf <- cmdArgs conf
  when (null $ file cf) $ error "Please give a BAM file as argument"
  bams <- readBams (file cf)
  let tm = foldl' (modifytargetmap (fromIntegral $ bucketsize cf)) S.empty bams
  -- print
  putStrLn $ unlines $ pprint (bucketsize cf) tm

-- Pretty-printing
pprint :: Int -> TargetMap -> [String]
pprint bs = concatMap printTgt . S.toList
  where printTgt (ctg,h) = map (printBucket ctg) $ interpolate $ I.toList h
        printBucket ctg (b,v) = intercalate "\t" [B.unpack ctg, show b, pr $ paired v, pr $ orphans v, pr $ links v]
        interpolate ((x1,v1):(x2,v2):rest) = (x1*bs,v1) 
                                             : if x2 == x1+1 then interpolate ((x2,v2):rest)
                                               else interpolate ((x1+1,h0):(x2,v2):rest)
        interpolate [(x,v)] = [(x*bs,v)]
        interpolate []      = [] -- shouldn't happen, I think
        pr = printf "%.2f"
        
-- Collecting bams into TargetMaps
modifytargetmap :: Offset -> TargetMap -> Bam1 -> TargetMap
modifytargetmap bz tm b = case targetName b of 
  Nothing -> tm
  Just tgt -> S.insert tgt (modifybuckets bz b $ fromMaybe I.empty $ S.lookup tgt tm) tm

-- When the target is determined, modify the buckets in the histogram
modifybuckets :: Offset -> Bam1 -> Hist -> Hist
modifybuckets bz b h = 
  let p = fromMaybe (err "couldn't determine position" b) $ position b
      l = fromMaybe (err "couldn't determine length" b)   $ queryLength b  
      (b1,bl) = p `divMod` bz
      (b2,bl2) = (p+l) `divMod` bz
      (//) x y = fromIntegral x / fromIntegral y
      buckets = if b2>b1 then (b1,1-bl//bz):[(x,1)| x <- [b1+1..b2-1]]++[(b2,bl2//bz)] 
                else [(b1,l//bz)]
      ins1 :: Hist -> (Offset,Double) -> Hist
      ins1 hist (bucket,value) = 
        let bu = fromIntegral bucket
            h' = modifyhist b value $ fromMaybe h0 (I.lookup bu hist)
        in h' `seq` I.insert bu h' hist
  in foldl' ins1 h buckets
 
-- Modify a single bucket record by a given amount  
modifyhist :: Bam1 -> Double -> H -> H
modifyhist b v (H p o l) 
  | isMateUnmap b                    = H p (o+v) l
  | mateTargetName b /= targetName b = H p o (l+v)
  | otherwise                        = H (p+v) o l

err :: String -> Bam1 -> a
err str b = error (str++"\n"++show b)