```#!/usr/bin/env python
# -*- coding: UTF-8 -*-

"""
Some math formula for various calculations
"""

import sys
import numpy as np

from math import log, exp
from Bio.Cluster import distancematrix

from jcvi.utils.cbook import human_size

def spearmanr(x, y):
"""
Michiel de Hoon's library (available in BioPython or standalone as
PyCluster) returns Spearman rsb which does include a tie correction.

>>> x = [5.05, 6.75, 3.21, 2.66]
>>> y = [1.65, 26.5, -5.93, 7.96]
>>> z = [1.65, 2.64, 2.64, 6.95]
>>> round(spearmanr(x, y), 4)
0.4
>>> round(spearmanr(x, z), 4)
-0.6325
"""
if not x or not y:
return 0
return 1 - distancematrix((x, y), dist="s")[1][0]

def geometric_mean(a, b, cast=int):
return cast((a * b) ** .5)

def reject_outliers(a, threshold=3.5):
"""
Iglewicz and Hoaglin's robust test for multiple outliers (two sided test).
<http://www.itl.nist.gov/div898/handbook/eda/section3/eda35h.htm>

<http://contchart.com/outliers.aspx>

>>> a = [0, 1, 2, 4, 12, 58, 188, 189]
>>> reject_outliers(a)
array([False, False, False, False, False,  True,  True,  True], dtype=bool)
"""
if len(a) < 3:
return np.zeros(len(a), dtype=bool)

A = np.array(a, dtype=float)
M = np.median(A)
D = np.absolute(A - M)
Mi = np.absolute(0.6745 * (A - M) / MAD)
#return A, M, D, MAD, Mi
return Mi > 3.5

def recomb_probability(cM, method="kosambi"):
"""
<http://statgen.ncsu.edu/qtlcart/manual/node46.html>

>>> recomb_probability(1)
0.009998666879965463
>>> recomb_probability(100)
0.48201379003790845
>>> recomb_probability(10000)
0.5
"""
assert method in ("kosambi", "haldane")
d = cM / 100.
if method == "kosambi":
e4d = exp(4 * d)
return (e4d - 1) / (e4d + 1) / 2
elif method == "haldane":
return (1 - exp(-2 * d)) / 2

def jukesCantorD(p, L=100):
"""
>>> jukesCantorD(.1)
(0.10732563273050497, 0.001198224852071006)
>>> jukesCantorD(.7)
(2.0310376508266565, 0.47249999999999864)
"""
assert 0 <= p < .75

rD = 1 - 4. / 3 * p
D = -.75 * log(rD)
varD = p * (1 - p) / (rD ** 2 * L)

return D, varD

def jukesCantorP(D):
"""
>>> jukesCantorP(.1)
0.09362001071778939
>>> jukesCantorP(2)
0.6978874115828988
"""
rD = exp(-4. / 3 * D)
p = .75 * (1 - rD)
return p

"""
Calculate velvet memory requirement.

Ram required for velvetg = -109635 + 18977*ReadSize + 86326*GenomeSize +

Genome size is in millions of bases (Mb)
Number of reads is in millions
K is the kmer hash value used in velveth
"""
ram = -109635 + 18977 * readsize + 86326 * genomesize + \
233353 * numreads - 51092 * K
print >> sys.stderr, "GenomeSize: {0}Mb".format(genomesize)