mirror of https://github.com/exaloop/codon
stdlib/statistics.codon
parent
76e025305c
commit
cfc0f563f1
|
@ -1,22 +1,33 @@
|
|||
# (c) 2022 Exaloop Inc. All rights reserved.
|
||||
|
||||
import bisect
|
||||
import random
|
||||
from math import frexp as _frexp, floor as _floor, fabs as _fabs, erf as _erf
|
||||
from math import gcd as _gcd, exp as _exp, log as _log, sqrt as _sqrt, tau as _tau, hypot as _hypot
|
||||
from math import (
|
||||
gcd as _gcd,
|
||||
exp as _exp,
|
||||
log as _log,
|
||||
sqrt as _sqrt,
|
||||
tau as _tau,
|
||||
hypot as _hypot,
|
||||
)
|
||||
|
||||
|
||||
class StatisticsError:
|
||||
_hdr: ExcHeader
|
||||
|
||||
def __init__(self):
|
||||
self._hdr = ('StatisticsError', '', '', '', 0, 0)
|
||||
self._hdr = ("StatisticsError", "", "", "", 0, 0)
|
||||
|
||||
def __init__(self, message: str):
|
||||
self._hdr = ('StatisticsError', message, '', '', 0, 0)
|
||||
self._hdr = ("StatisticsError", message, "", "", 0, 0)
|
||||
|
||||
@property
|
||||
def message(self):
|
||||
return self._hdr.msg
|
||||
|
||||
def median[T](data: List[T]) -> float:
|
||||
|
||||
def median(data: List[T], T: type) -> float:
|
||||
"""
|
||||
Return the median (middle value) of numeric data.
|
||||
|
||||
|
@ -28,13 +39,14 @@ def median[T](data: List[T]) -> float:
|
|||
n = len(data)
|
||||
if n == 0:
|
||||
raise StatisticsError("no median for empty data")
|
||||
if n%2 == 1:
|
||||
return float(data[n//2])
|
||||
if n % 2 == 1:
|
||||
return float(data[n // 2])
|
||||
else:
|
||||
i = n//2
|
||||
i = n // 2
|
||||
return (data[i - 1] + data[i]) / 2
|
||||
|
||||
def median_low[T](data: List[T]) -> float:
|
||||
|
||||
def median_low(data: List[T], T: type) -> float:
|
||||
"""
|
||||
Return the low median of numeric data.
|
||||
|
||||
|
@ -45,12 +57,13 @@ def median_low[T](data: List[T]) -> float:
|
|||
n = len(data)
|
||||
if n == 0:
|
||||
raise StatisticsError("no median for empty data")
|
||||
if n%2 == 1:
|
||||
return float(data[n//2])
|
||||
if n % 2 == 1:
|
||||
return float(data[n // 2])
|
||||
else:
|
||||
return float(data[n//2 -1])
|
||||
return float(data[n // 2 - 1])
|
||||
|
||||
def median_high[T](data: List[T]) -> float:
|
||||
|
||||
def median_high(data: List[T], T: type) -> float:
|
||||
"""
|
||||
Return the high median of data.
|
||||
|
||||
|
@ -61,9 +74,10 @@ def median_high[T](data: List[T]) -> float:
|
|||
n = len(data)
|
||||
if n == 0:
|
||||
raise StatisticsError("no median for empty data")
|
||||
return float(data[n//2])
|
||||
return float(data[n // 2])
|
||||
|
||||
def _find_lteq[T](a: List[T], x: float):
|
||||
|
||||
def _find_lteq(a: List[T], x: float, T: type):
|
||||
"""
|
||||
Locate the leftmost value exactly equal to x
|
||||
"""
|
||||
|
@ -72,16 +86,18 @@ def _find_lteq[T](a: List[T], x: float):
|
|||
return i
|
||||
assert False
|
||||
|
||||
def _find_rteq[T](a: List[T], l: int, x: float):
|
||||
|
||||
def _find_rteq(a: List[T], l: int, x: float, T: type):
|
||||
"""
|
||||
Locate the rightmost value exactly equal to x
|
||||
"""
|
||||
i = bisect.bisect_right(a, x, lo=l)
|
||||
if i != (len(a)+1) and a[i-1] == x:
|
||||
return i-1
|
||||
if i != (len(a) + 1) and a[i - 1] == x:
|
||||
return i - 1
|
||||
assert False
|
||||
|
||||
def median_grouped[T,S=int](data: List[T], interval: S = 1) -> float:
|
||||
|
||||
def median_grouped(data: List[T], interval: S = 1, T: type, S: type = int) -> float:
|
||||
"""
|
||||
Return the 50th percentile (median) of grouped continuous data.
|
||||
"""
|
||||
|
@ -93,8 +109,8 @@ def median_grouped[T,S=int](data: List[T], interval: S = 1) -> float:
|
|||
return float(data[0])
|
||||
|
||||
# Find the value at the midpoint.
|
||||
x = float(data[n//2])
|
||||
L = x - float(interval)/2 # The lower limit of the median interval.
|
||||
x = float(data[n // 2])
|
||||
L = x - float(interval) / 2 # The lower limit of the median interval.
|
||||
|
||||
# Find the position of leftmost occurrence of x in data
|
||||
l1 = _find_lteq(data, x)
|
||||
|
@ -103,9 +119,10 @@ def median_grouped[T,S=int](data: List[T], interval: S = 1) -> float:
|
|||
l2 = _find_rteq(data, l1, x)
|
||||
cf = l1
|
||||
f = l2 - l1 + 1
|
||||
return L + interval*(n/2 - cf)/f
|
||||
return L + interval * (n / 2 - cf) / f
|
||||
|
||||
def mode[T](data: List[T]) -> T:
|
||||
|
||||
def mode(data: List[T], T: type) -> T:
|
||||
"""
|
||||
Return the most common data point from discrete or nominal data.
|
||||
"""
|
||||
|
@ -119,7 +136,8 @@ def mode[T](data: List[T]) -> T:
|
|||
elem = i
|
||||
return elem
|
||||
|
||||
def multimode[T](data: List[T]):
|
||||
|
||||
def multimode(data: List[T], T: type):
|
||||
"""
|
||||
Return a list of the most frequently occurring values.
|
||||
|
||||
|
@ -141,7 +159,10 @@ def multimode[T](data: List[T]):
|
|||
mulmode.append(i)
|
||||
return mulmode
|
||||
|
||||
def quantiles[T](data: List[T], n: int = 4, method: str = 'exclusive') -> List[float]:
|
||||
|
||||
def quantiles(
|
||||
data: List[T], n: int = 4, method: str = "exclusive", T: type
|
||||
) -> List[float]:
|
||||
"""
|
||||
Divide *data* into *n* continuous intervals with equal probability.
|
||||
|
||||
|
@ -159,32 +180,33 @@ def quantiles[T](data: List[T], n: int = 4, method: str = 'exclusive') -> List[f
|
|||
maximum value is treated as the 100th percentile.
|
||||
"""
|
||||
if n < 1:
|
||||
raise StatisticsError('n must be at least 1')
|
||||
raise StatisticsError("n must be at least 1")
|
||||
data = sorted(data)
|
||||
ld = len(data)
|
||||
if ld < 2:
|
||||
raise StatisticsError('must have at least two data points')
|
||||
raise StatisticsError("must have at least two data points")
|
||||
|
||||
if method == 'inclusive':
|
||||
if method == "inclusive":
|
||||
m = ld - 1
|
||||
result = List[float]()
|
||||
for i in range(1, n):
|
||||
j = i * m // n
|
||||
delta = (i * m) - (j * n)
|
||||
interpolated = (data[j] * (n - delta) + data[j+1] * delta) / n
|
||||
interpolated = (data[j] * (n - delta) + data[j + 1] * delta) / n
|
||||
result.append(interpolated)
|
||||
return result
|
||||
if method == 'exclusive':
|
||||
if method == "exclusive":
|
||||
m = ld + 1
|
||||
result = List[float]()
|
||||
for i in range(1, n):
|
||||
j = i * m // n # rescale i to m/n
|
||||
j = 1 if j < 1 else ld-1 if j > ld-1 else j # clamp to 1 .. ld-1
|
||||
delta = (i * m) - (j * n) # exact integer math
|
||||
interpolated = (data[j-1] * (n - delta) + data[j] * delta) / n
|
||||
j = i * m // n # rescale i to m/n
|
||||
j = 1 if j < 1 else ld - 1 if j > ld - 1 else j # clamp to 1 .. ld-1
|
||||
delta = (i * m) - (j * n) # exact integer math
|
||||
interpolated = (data[j - 1] * (n - delta) + data[j] * delta) / n
|
||||
result.append(interpolated)
|
||||
return result
|
||||
raise ValueError(f'Unknown method: {method}')
|
||||
raise ValueError(f"Unknown method: {method}")
|
||||
|
||||
|
||||
def _lcm(x: int, y: int):
|
||||
"""
|
||||
|
@ -233,6 +255,7 @@ def _sum(data: List[float]) -> float:
|
|||
i += 1
|
||||
return s + c
|
||||
|
||||
|
||||
def mean(data: List[float]) -> float:
|
||||
"""
|
||||
Return the sample arithmetic mean of data.
|
||||
|
@ -243,9 +266,10 @@ def mean(data: List[float]) -> float:
|
|||
"""
|
||||
n = len(data)
|
||||
if n < 1:
|
||||
raise StatisticsError('mean requires at least one data point')
|
||||
raise StatisticsError("mean requires at least one data point")
|
||||
total = _sum(data)
|
||||
return total/n
|
||||
return total / n
|
||||
|
||||
|
||||
'''
|
||||
def fmean(data: List[float]) -> float:
|
||||
|
@ -259,6 +283,7 @@ def fmean(data: List[float]) -> float:
|
|||
pass
|
||||
'''
|
||||
|
||||
|
||||
def geometric_mean(data: List[float]) -> float:
|
||||
"""
|
||||
Convert data to floats and compute the geometric mean.
|
||||
|
@ -274,6 +299,7 @@ def geometric_mean(data: List[float]) -> float:
|
|||
raise StatisticsError("geometric mean requires a non-empty dataset")
|
||||
return _exp(mean(list(map(_log, data))))
|
||||
|
||||
|
||||
def _fail_neg(values: List[float], errmsg: str):
|
||||
"""
|
||||
Iterate over values, failing if any are less than zero.
|
||||
|
@ -283,6 +309,7 @@ def _fail_neg(values: List[float], errmsg: str):
|
|||
raise StatisticsError(errmsg)
|
||||
yield x
|
||||
|
||||
|
||||
def harmonic_mean(data: List[float]) -> float:
|
||||
"""
|
||||
Return the harmonic mean of data.
|
||||
|
@ -307,9 +334,10 @@ def harmonic_mean(data: List[float]) -> float:
|
|||
for x in _fail_neg(data, errmsg):
|
||||
if x == 0.0:
|
||||
return 0.0
|
||||
li.append(1/x)
|
||||
li.append(1 / x)
|
||||
total = _sum(li)
|
||||
return n/total
|
||||
return n / total
|
||||
|
||||
|
||||
def _ss(data: List[float], c: float):
|
||||
"""
|
||||
|
@ -319,13 +347,14 @@ def _ss(data: List[float], c: float):
|
|||
from the mean are calculated in a second pass. Otherwise, deviations are
|
||||
calculated from c as given.
|
||||
"""
|
||||
total = _sum([(x-c)**2 for x in data])
|
||||
total2 = _sum([(x-c) for x in data])
|
||||
total = _sum([(x - c) ** 2 for x in data])
|
||||
total2 = _sum([(x - c) for x in data])
|
||||
|
||||
total -= total2**2/len(data)
|
||||
total -= total2 ** 2 / len(data)
|
||||
return total
|
||||
|
||||
def pvariance(data: List[float], mu: Optional[float]=None):
|
||||
|
||||
def pvariance(data: List[float], mu: Optional[float] = None):
|
||||
"""
|
||||
Return the population variance of `data`.
|
||||
|
||||
|
@ -336,24 +365,28 @@ def pvariance(data: List[float], mu: Optional[float]=None):
|
|||
TODO/CAVEATS:
|
||||
- Assumes input is a list of floats
|
||||
"""
|
||||
mu1 = ~mu if mu else mean(data)
|
||||
if mu is None:
|
||||
mu = mean(data)
|
||||
|
||||
n = len(data)
|
||||
if n < 1:
|
||||
raise StatisticsError("pvariance requires at least one data point")
|
||||
|
||||
ss = _ss(data, mu1)
|
||||
return ss/n
|
||||
ss = _ss(data, mu)
|
||||
return ss / n
|
||||
|
||||
def pstdev(data: List[float], mu: Optional[float]=None):
|
||||
|
||||
def pstdev(data: List[float], mu: Optional[float] = None):
|
||||
"""
|
||||
Return the square root of the population variance.
|
||||
"""
|
||||
mu1 = ~mu if mu else mean(data)
|
||||
var = pvariance(data, mu1)
|
||||
if mu is None:
|
||||
mu = mean(data)
|
||||
var = pvariance(data, mu)
|
||||
return _sqrt(var)
|
||||
|
||||
def variance(data: List[float], xbar: Optional[float]=None):
|
||||
|
||||
def variance(data: List[float], xbar: Optional[float] = None):
|
||||
"""
|
||||
Return the sample variance of data.
|
||||
|
||||
|
@ -361,36 +394,43 @@ def variance(data: List[float], xbar: Optional[float]=None):
|
|||
The optional argument xbar, if given, should be the mean of
|
||||
the data. If it is missing or None, the mean is automatically calculated.
|
||||
"""
|
||||
xbar1 = ~xbar if xbar else mean(data)
|
||||
if xbar is None:
|
||||
xbar = mean(data)
|
||||
n = len(data)
|
||||
if n < 2:
|
||||
raise StatisticsError("variance requires at least two data points")
|
||||
ss = _ss(data, xbar1)
|
||||
return ss/(n-1)
|
||||
ss = _ss(data, xbar)
|
||||
return ss / (n - 1)
|
||||
|
||||
def stdev(data, xbar: Optional[float]=None):
|
||||
|
||||
def stdev(data, xbar: Optional[float] = None):
|
||||
"""
|
||||
Return the square root of the sample variance.
|
||||
"""
|
||||
xbar1 = ~xbar if xbar else mean(data)
|
||||
var = variance(data, xbar1)
|
||||
if xbar is None:
|
||||
xbar = mean(data)
|
||||
var = variance(data, xbar)
|
||||
return _sqrt(var)
|
||||
|
||||
|
||||
class NormalDist:
|
||||
"""
|
||||
Normal distribution of a random variable
|
||||
"""
|
||||
|
||||
PRECISION: float
|
||||
_mu: float
|
||||
_sigma: float
|
||||
|
||||
def __eq__(self, other: NormalDist):
|
||||
return (self._mu - other._mu) < self.PRECISION and (self._sigma - other._sigma) < self.PRECISION
|
||||
return (self._mu - other._mu) < self.PRECISION and (
|
||||
self._sigma - other._sigma
|
||||
) < self.PRECISION
|
||||
|
||||
def _init(self, mu: float, sigma: float):
|
||||
self.PRECISION = 1e-6
|
||||
if sigma < 0.0:
|
||||
raise StatisticsError('sigma must be non-negative')
|
||||
raise StatisticsError("sigma must be non-negative")
|
||||
self._mu = mu
|
||||
self._sigma = sigma
|
||||
|
||||
|
@ -447,7 +487,7 @@ class NormalDist:
|
|||
"""
|
||||
variance = self._sigma ** 2.0
|
||||
if not variance:
|
||||
raise StatisticsError('pdf() not defined when sigma is zero')
|
||||
raise StatisticsError("pdf() not defined when sigma is zero")
|
||||
return _exp((x - self._mu) ** 2.0 / (-2.0 * variance)) / _sqrt(_tau * variance)
|
||||
|
||||
def cdf(self, x):
|
||||
|
@ -455,7 +495,7 @@ class NormalDist:
|
|||
Cumulative distribution function. P(X <= x)
|
||||
"""
|
||||
if not self._sigma:
|
||||
raise StatisticsError('cdf() not defined when sigma is zero')
|
||||
raise StatisticsError("cdf() not defined when sigma is zero")
|
||||
return 0.5 * (1.0 + _erf((x - self._mu) / (self._sigma * _sqrt(2.0))))
|
||||
|
||||
def _normal_dist_inv_cdf(self, p: float, mu: float, sigma: float):
|
||||
|
@ -470,22 +510,55 @@ class NormalDist:
|
|||
if _fabs(q) <= 0.425:
|
||||
r = 0.180625 - q * q
|
||||
# Hash sum: 55.88319_28806_14901_4439
|
||||
num = (((((((2.5090809287301226727e+3 * r +
|
||||
3.3430575583588128105e+4) * r +
|
||||
6.7265770927008700853e+4) * r +
|
||||
4.5921953931549871457e+4) * r +
|
||||
1.3731693765509461125e+4) * r +
|
||||
1.9715909503065514427e+3) * r +
|
||||
1.3314166789178437745e+2) * r +
|
||||
3.3871328727963666080e+0) * q
|
||||
den = (((((((5.2264952788528545610e+3 * r +
|
||||
2.8729085735721942674e+4) * r +
|
||||
3.9307895800092710610e+4) * r +
|
||||
2.1213794301586595867e+4) * r +
|
||||
5.3941960214247511077e+3) * r +
|
||||
6.8718700749205790830e+2) * r +
|
||||
4.2313330701600911252e+1) * r +
|
||||
1.0)
|
||||
num = (
|
||||
(
|
||||
(
|
||||
(
|
||||
(
|
||||
(
|
||||
(
|
||||
2.5090809287301226727e3 * r
|
||||
+ 3.3430575583588128105e4
|
||||
)
|
||||
* r
|
||||
+ 6.7265770927008700853e4
|
||||
)
|
||||
* r
|
||||
+ 4.5921953931549871457e4
|
||||
)
|
||||
* r
|
||||
+ 1.3731693765509461125e4
|
||||
)
|
||||
* r
|
||||
+ 1.9715909503065514427e3
|
||||
)
|
||||
* r
|
||||
+ 1.3314166789178437745e2
|
||||
)
|
||||
* r
|
||||
+ 3.3871328727963666080e0
|
||||
) * q
|
||||
den = (
|
||||
(
|
||||
(
|
||||
(
|
||||
(
|
||||
(5.2264952788528545610e3 * r + 2.8729085735721942674e4)
|
||||
* r
|
||||
+ 3.9307895800092710610e4
|
||||
)
|
||||
* r
|
||||
+ 2.1213794301586595867e4
|
||||
)
|
||||
* r
|
||||
+ 5.3941960214247511077e3
|
||||
)
|
||||
* r
|
||||
+ 6.8718700749205790830e2
|
||||
)
|
||||
* r
|
||||
+ 4.2313330701600911252e1
|
||||
) * r + 1.0
|
||||
x = num / den
|
||||
return mu + (x * sigma)
|
||||
r = p if q <= 0.0 else 1.0 - p
|
||||
|
@ -493,41 +566,105 @@ class NormalDist:
|
|||
if r <= 5.0:
|
||||
r = r - 1.6
|
||||
# Hash sum: 49.33206_50330_16102_89036
|
||||
num = (((((((7.74545014278341407640e-4 * r +
|
||||
2.27238449892691845833e-2) * r +
|
||||
2.41780725177450611770e-1) * r +
|
||||
1.27045825245236838258e+0) * r +
|
||||
3.64784832476320460504e+0) * r +
|
||||
5.76949722146069140550e+0) * r +
|
||||
4.63033784615654529590e+0) * r +
|
||||
1.42343711074968357734e+0)
|
||||
den = (((((((1.05075007164441684324e-9 * r +
|
||||
5.47593808499534494600e-4) * r +
|
||||
1.51986665636164571966e-2) * r +
|
||||
1.48103976427480074590e-1) * r +
|
||||
6.89767334985100004550e-1) * r +
|
||||
1.67638483018380384940e+0) * r +
|
||||
2.05319162663775882187e+0) * r +
|
||||
1.0)
|
||||
num = (
|
||||
(
|
||||
(
|
||||
(
|
||||
(
|
||||
(
|
||||
7.74545014278341407640e-4 * r
|
||||
+ 2.27238449892691845833e-2
|
||||
)
|
||||
* r
|
||||
+ 2.41780725177450611770e-1
|
||||
)
|
||||
* r
|
||||
+ 1.27045825245236838258e0
|
||||
)
|
||||
* r
|
||||
+ 3.64784832476320460504e0
|
||||
)
|
||||
* r
|
||||
+ 5.76949722146069140550e0
|
||||
)
|
||||
* r
|
||||
+ 4.63033784615654529590e0
|
||||
) * r + 1.42343711074968357734e0
|
||||
den = (
|
||||
(
|
||||
(
|
||||
(
|
||||
(
|
||||
(
|
||||
1.05075007164441684324e-9 * r
|
||||
+ 5.47593808499534494600e-4
|
||||
)
|
||||
* r
|
||||
+ 1.51986665636164571966e-2
|
||||
)
|
||||
* r
|
||||
+ 1.48103976427480074590e-1
|
||||
)
|
||||
* r
|
||||
+ 6.89767334985100004550e-1
|
||||
)
|
||||
* r
|
||||
+ 1.67638483018380384940e0
|
||||
)
|
||||
* r
|
||||
+ 2.05319162663775882187e0
|
||||
) * r + 1.0
|
||||
else:
|
||||
r = r - 5.0
|
||||
# Hash sum: 47.52583_31754_92896_71629
|
||||
num = (((((((2.01033439929228813265e-7 * r +
|
||||
2.71155556874348757815e-5) * r +
|
||||
1.24266094738807843860e-3) * r +
|
||||
2.65321895265761230930e-2) * r +
|
||||
2.96560571828504891230e-1) * r +
|
||||
1.78482653991729133580e+0) * r +
|
||||
5.46378491116411436990e+0) * r +
|
||||
6.65790464350110377720e+0)
|
||||
den = (((((((2.04426310338993978564e-15 * r +
|
||||
1.42151175831644588870e-7) * r +
|
||||
1.84631831751005468180e-5) * r +
|
||||
7.86869131145613259100e-4) * r +
|
||||
1.48753612908506148525e-2) * r +
|
||||
1.36929880922735805310e-1) * r +
|
||||
5.99832206555887937690e-1) * r +
|
||||
1.0)
|
||||
num = (
|
||||
(
|
||||
(
|
||||
(
|
||||
(
|
||||
(
|
||||
2.01033439929228813265e-7 * r
|
||||
+ 2.71155556874348757815e-5
|
||||
)
|
||||
* r
|
||||
+ 1.24266094738807843860e-3
|
||||
)
|
||||
* r
|
||||
+ 2.65321895265761230930e-2
|
||||
)
|
||||
* r
|
||||
+ 2.96560571828504891230e-1
|
||||
)
|
||||
* r
|
||||
+ 1.78482653991729133580e0
|
||||
)
|
||||
* r
|
||||
+ 5.46378491116411436990e0
|
||||
) * r + 6.65790464350110377720e0
|
||||
den = (
|
||||
(
|
||||
(
|
||||
(
|
||||
(
|
||||
(
|
||||
2.04426310338993978564e-15 * r
|
||||
+ 1.42151175831644588870e-7
|
||||
)
|
||||
* r
|
||||
+ 1.84631831751005468180e-5
|
||||
)
|
||||
* r
|
||||
+ 7.86869131145613259100e-4
|
||||
)
|
||||
* r
|
||||
+ 1.48753612908506148525e-2
|
||||
)
|
||||
* r
|
||||
+ 1.36929880922735805310e-1
|
||||
)
|
||||
* r
|
||||
+ 5.99832206555887937690e-1
|
||||
) * r + 1.0
|
||||
x = num / den
|
||||
if q < 0.0:
|
||||
x = -x
|
||||
|
@ -542,12 +679,12 @@ class NormalDist:
|
|||
probability.
|
||||
"""
|
||||
if p <= 0.0 or p >= 1.0:
|
||||
raise StatisticsError('p must be in the range 0.0 < p < 1.0')
|
||||
raise StatisticsError("p must be in the range 0.0 < p < 1.0")
|
||||
if self._sigma <= 0.0:
|
||||
raise StatisticsError('cdf() not defined when sigma at or below zero')
|
||||
raise StatisticsError("cdf() not defined when sigma at or below zero")
|
||||
return self._normal_dist_inv_cdf(p, self._mu, self._sigma)
|
||||
|
||||
def quantiles(self, n: int=4):
|
||||
def quantiles(self, n: int = 4):
|
||||
"""
|
||||
Divide into *n* continuous intervals with equal probability.
|
||||
|
||||
|
@ -557,7 +694,7 @@ class NormalDist:
|
|||
Set *n* to 100 for percentiles which gives the 99 cuts points that
|
||||
separate the normal distribution in to 100 equal sized groups.
|
||||
"""
|
||||
return [self.inv_cdf(float(i)/float(n)) for i in range(1, n)]
|
||||
return [self.inv_cdf(float(i) / float(n)) for i in range(1, n)]
|
||||
|
||||
def overlap(self, other: NormalDist) -> float:
|
||||
"""
|
||||
|
@ -569,13 +706,13 @@ class NormalDist:
|
|||
"""
|
||||
X, Y = self, other
|
||||
|
||||
if (Y._sigma, Y._mu) < (X._sigma, X._mu): # sort to assure commutativity
|
||||
if (Y._sigma, Y._mu) < (X._sigma, X._mu): # sort to assure commutativity
|
||||
X, Y = Y, X
|
||||
|
||||
X_var, Y_var = X.variance, Y.variance
|
||||
|
||||
if not X_var or not Y_var:
|
||||
raise StatisticsError('overlap() not defined when sigma is zero')
|
||||
raise StatisticsError("overlap() not defined when sigma is zero")
|
||||
|
||||
dv = Y_var - X_var
|
||||
dm = _fabs(Y._mu - X._mu)
|
||||
|
@ -584,7 +721,7 @@ class NormalDist:
|
|||
return 1.0 - _erf(dm / (2.0 * X._sigma * _sqrt(2.0)))
|
||||
|
||||
a = X._mu * Y_var - Y._mu * X_var
|
||||
b = X._sigma * Y._sigma * _sqrt(dm**2.0 + dv * _log(Y_var / X_var))
|
||||
b = X._sigma * Y._sigma * _sqrt(dm ** 2.0 + dv * _log(Y_var / X_var))
|
||||
x1 = (a + b) / dv
|
||||
x2 = (a - b) / dv
|
||||
|
||||
|
@ -689,4 +826,4 @@ class NormalDist:
|
|||
return hash((self._mu, self._sigma))
|
||||
|
||||
def __repr__(self):
|
||||
return f'NormalDist(mu={self._mu}, sigma={self._sigma})'
|
||||
return f"NormalDist(mu={self._mu}, sigma={self._sigma})"
|
||||
|
|
Loading…
Reference in New Issue