1 module Bio.Sequence.Entropy (KWords(..), entropy) where
    2 
    3 import Data.List
    4 
    5 class KWords s where
    6    kwords :: Int -> s -> [s]
    7 
    8 instance KWords [a] where
    9    kwords k = dropWhile ((<k) . length) . reverse . map (take k) . tails
   10 
   11 -- naïve implementation, but possibly sufficient
   12 -- could use a Map of words instead
   13 -- this calculates the entropy of the k-words in the string
   14 -- this is NOT the same as kth order entropy
   15 entropy :: (Ord str, KWords str) => Int -> str -> Double
   16 entropy k s = negate . sum . map nlogn $ probs ls
   17     where ls = map (fromIntegral . length) . group . sort . kwords k $ s
   18 
   19 nlogn :: (Floating a) => a -> a
   20 nlogn x = x*log x/log 2
   21 probs :: (Fractional a) => [a] -> [a]
   22 probs ls = map (/ sum ls) ls