-- | Extract links from contig ends from a BAM file
--   Build an associative map from ContigID to links.
module ContigEnds where

import Bio.SamTools.Bam
import Bio.SamTools.BamIndex
import Bio.SamTools.Classify
import qualified Data.IntMap as M
import Data.List (sort, sortBy, groupBy)
import Control.Monad (when)

import Base

collect_links :: FilePath -> IO (Int,Links,Reads)
collect_links f = do
  -- get contig list, then iterate through to extract all links, store in map
  st <- statistics `fmap` (classify . take 1000) `fmap` readBams f
  hdr <- withBamInFile f (\h -> return $ inHeader h)
  hx <- open f
  let n   = nTargets hdr
      tsl = M.fromList $ zip [0..] 
            $ map (\x -> (name x,fromIntegral (len x))) $ targetSeqList hdr
  print n
  lft <- gen_links (left_links hx) [0..n-1]
  rgt <- gen_links (right_links tsl hx) [0..n-1]
  return (n,Links st lft rgt,tsl)

gen_links fn = go M.empty
  where go m (i:is) = do
          ls <- filterdups `fmap` fn i
          go (if null ls then m else M.insert i ls m) is
        go m [] = return m

filterdups [] = []
filterdups [x] = [x]
filterdups (x:y:zs) = 
  if and [f x == f y | f <- [targetID, mateTargetID]]
     && and [f x == f y | f <- [position, matePosition]]
  then filterdups (y:zs)
  else x : filterdups (y:zs)

-- | Extract from the BAM/BAI files the links (i.e. matching reads whose 
--   mate is on a different contig) in the corresponding direction.

left_links :: IdxHandle-> ContigID -> IO [Bam1]
left_links i c = filter isLeft `fmap` readBamRegion i c (0,350)  
    where isLeft b = not (isSecondary b) && isReverse b 
                     && mateTargetID b /= targetID b -- assuming "innie"
          
right_links :: Reads -> IdxHandle-> ContigID -> IO [Bam1]
right_links r i c = do
  let l = targetlength r c
  filter isRight `fmap` readBamRegion i c (l-350,l)
  where isRight b = not (isSecondary b) && not (isReverse b) 
                    && mateTargetID b /= targetID b -- assuming "innie"  
