1 module Bio.Sequence.Entropy (KWords(..), entropy) where 2 3 import Data.List 4 5 class KWords s where 6 kwords :: Int -> s -> [s] 7 8 instance KWords [a] where 9 kwords k = dropWhile ((<k) . length) . reverse . map (take k) . tails 10 11 -- naïve implementation, but possibly sufficient 12 -- could use a Map of words instead 13 -- this calculates the entropy of the k-words in the string 14 -- this is NOT the same as kth order entropy 15 entropy :: (Ord str, KWords str) => Int -> str -> Double 16 entropy k s = negate . sum . map nlogn $ probs ls 17 where ls = map (fromIntegral . length) . group . sort . kwords k $ s 18 19 nlogn :: (Floating a) => a -> a 20 nlogn x = x*log x/log 2 21 probs :: (Fractional a) => [a] -> [a] 22 probs ls = map (/ sum ls) ls