-- | Statistics for calculating probabilities from the binomial distribution

module Statistics where
import Test.QuickCheck hiding (choose)

-- | Number of combinations for choosing k from a set of n objects.  Needs to use Integer to
--   avoid running out of Int bits.
choose :: Integral i => i -> i -> Integer
n' `choose` k' = let (n,k) = (fromIntegral n', fromIntegral k')
                 in product [k+1..n] `div` product [1..n-k]

-- | Calculate binomial probability of observing exactly k positives from n trials,
--   with a probability p for a positive result in each trial.
binomial :: Integral i => Double -> i -> i -> Double
binomial p n k -- | p*fromIntegral n > 20 && (1-p)*fromIntegral n > 20  = normal_apporx p n k
               | k <= n    = fromIntegral (n `choose` k) * (p**fromIntegral k) * ((1-p)** fromIntegral (n-k))
               | otherwise = error ("binomial: can't observe more than 'n' positives:"++show n++" "++show k)

prop_binom p n k = k <= n && p>=0 && p <= 1 ==> sum [ binomial p n k | k <- [0..n]] - 1 < 0.001


-- | Calculate cumulative binomial probability of achieving k or fewer positive results.
cumbin :: Integral i => Double -> i -> i -> Double
cumbin p n k = sum [binomial p n x | x <- [0..k]]

prop_cumbin p n k = k <= n && n-k-1 <= n && p>=0 && p <= 1
                    ==> cumbin p n k + cumbin (1-p) n (n-k-1) - 1 < 0.001

{- crap! this won't work, you can't just sum normals, goddamit.
normal_approx p n k = pdf_normal (fromIntegral n*p) (fromIntegral n*p*(1-p)) (fromIntegral k)
pdf_normal mu sigma x = exp(negate((x-mu)^2/(2*sigma^2)))/(sigma*sqrt(2*pi))

invcumnorm mu sigma z = mu + search (-limit*sigma) (limit*sigma)
    where search a b = let c = (a+b)/2
                           cn = cumnorm 0 sigma c
                       in if abs (z - cn) < 10*epsilon || abs (a-b) < epsilon then c
                            else if cn > z then search a c
                                 else search c b
-}