-- clusterLibs
-- calculate clusters by library, using the lib table
-- read patterns from a table, first line is header

module Main where

import System.Environment (getArgs)
import Data.List (intersperse)
import Numeric (showFFloat)

import Statistics
import Formats (LibTable, Cluster
               , readPatternTable, totalsByLib, readClusters, countClusters, classify)

main :: IO ()
main = do
   [ps,cs] <- do [xs,ys] <- getArgs 
                 return [xs,ys] 
             `catch` error "Usage: clusterlibs <libtable> <clusters>"
   pat <- readPatternTable ps
   counts <- totalsByLib pat cs
   writeTable [("","sum":map snd pat++["significance"])]
   writeTable . map (decorate counts) . countClusters pat =<< readClusters cs

-- | Using count by library, takes a cluster, calculates significance scores 
--   and formats it for output.
decorate :: (String,[Int]) -> (String,[Int]) -> (String,[String])
decorate counts (x,ys) = 
    let fractions = [ fromIntegral x / fromIntegral (sum $ snd counts) | x <- snd counts]
        clustersize = sum ys
        significance = unwords $ zipWith isSignificant ys fractions
        isSignificant obs frac = showFFloat (Just 3) (pvalue (1-frac) clustersize (clustersize-obs)) ""
    in (x,map show (clustersize:ys) ++ [significance])

-- --------------------------------------------------
-- | Calculate the p value of observing k sequences
--   from a library, given an a priori background distribution. 
pvalue :: Double -> Int -> Int -> Double
pvalue fraction tot obs = cumbin fraction tot obs

-- --------------------------------------------------
-- will need to match against all, to check for multiple matches
-- tag names with library
classClusters :: LibTable -> FilePath -> IO [Cluster]
classClusters ps f = return . map class1 =<< readClusters f
    where class1 (l,ss) = (l, map (\s -> classify ps s++":"++s) ss)

-- | Output the clusters to stdout
writeTable :: [(String,[String])] -> IO ()
writeTable = putStrLn . unlines . map show1 
    where
      show1 (name,stuff) = concat $ intersperse "\t" (name:stuff)

