codon/test/stdlib/statistics_test.codon

333 lines
9.7 KiB
Python

import statistics
import math
@test
def med():
# Test median with even nuber of int data points.
data = [1, 2, 3, 4, 5, 6]
assert statistics.median(data) == 3.5
# Test median with an odd number of int data points.
data = [1, 2, 3, 4, 5, 6, 9]
assert statistics.median(data) == 4
# Test median works with an odd number of Fractions.
fdata = [1/7, 2/7, 3/7, 4/7, 5/7]
assert statistics.median(fdata) == 3/7
# Test median works with an even number of Fractions.
fdata = [1/7, 2/7, 3/7, 4/7, 5/7, 6/7]
assert statistics.median(fdata) == 1/2
# Test median works with an odd number of Decimals.
ddata = [2.5, 3.1, 4.2, 5.7, 5.8]
assert statistics.median(ddata) == 4.2
med()
@test
def med_low():
# Test median_low with an even number of ints.
data = [1, 2, 3, 4, 5, 6]
assert statistics.median_low(data) == 3
# Test median_low works with an even number of Fractions.
fdata = [1/7, 2/7, 3/7, 4/7, 5/7, 6/7]
assert statistics.median_low(fdata) == 3/7
# Test median_low works with an even number of Decimals.
ddata = [1.1, 2.2, 3.3, 4.4, 5.5, 6.6]
assert statistics.median_low(ddata) == 3.3
med_low()
@test
def med_high():
# Test median_high with an even number of ints.
data = [1, 2, 3, 4, 5, 6]
assert statistics.median_high(data) == 4
# Test median_high works with an even number of Fractions.
fdata = [1/7, 2/7, 3/7, 4/7, 5/7, 6/7]
assert statistics.median_high(fdata) == 4/7
# Test median_high works with an even number of Decimals.
ddata = [1.1, 2.2, 3.3, 4.4, 5.5, 6.6]
assert statistics.median_high(ddata) == 4.4
med_high()
@test
def med_grouped():
# Test median_grouped with repeated median values.
data = [12, 13, 14, 14, 14, 15, 15]
assert statistics.median_grouped(data) == 14
data = [12, 13, 14, 14, 14, 14, 15]
assert statistics.median_grouped(data) == 13.875
data = [5, 10, 10, 15, 20, 20, 20, 20, 25, 25, 30]
assert statistics.median_grouped(data, 5) == 19.375
# Test median_grouped with repeated median values.
data = [2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 6, 6]
assert statistics.median_grouped(data) == 4.5
data = [3, 4, 4, 4, 5, 5, 5, 5, 6, 6]
assert statistics.median_grouped(data) == 4.75
# Test median_grouped with repeated single values.
ddata = [3.2]
assert statistics.median_grouped(ddata) == 3.2
# Test median_grouped works with an odd number of Fractions.
fdata = [5/4, 9/4, 13/4, 13/4, 17/4]
assert statistics.median_grouped(fdata) == 3.0
# Test median_grouped works with an even number of Fractions.
fdata = [5/4, 9/4, 13/4, 13/4, 17/4, 17/4]
assert statistics.median_grouped(fdata) == 3.25
# Test median_grouped works with an odd number of Decimals.
ddata = [5.5, 6.5, 6.5, 7.5, 8.5]
assert statistics.median_grouped(ddata) == 6.75
# Test median_grouped works with an even number of Decimals.
ddata = [5.5, 5.5, 6.5, 6.5, 7.5, 8.5]
assert statistics.median_grouped(ddata) == 6.5
med_grouped()
@test
def test_mode():
data = [12, 13, 14, 14, 14, 15, 15]
assert statistics.mode(data) == 14
data = list(range(20, 50, 3))
assert statistics.mode(data) == 20
# Test mode with nominal data.
ndata = ['a', 'b', 'c', 'b', 'd', 'b']
assert statistics.mode(ndata) == 'b'
ndata = ['fe', 'fi', 'fo', 'fum', 'fi', 'fi']
assert statistics.mode(ndata) == 'fi'
# Test mode with bimodal data.
data = [1, 1, 2, 2, 2, 2, 3, 4, 5, 6, 6, 6, 6, 7, 8, 9, 9]
assert statistics.mode(data) == 2
# Test mode when data points are all unique.
data = list(range(10))
assert statistics.mode(data) == 0
test_mode()
@test
def test_multimode():
data = [1, 1, 2, 2, 2, 2, 3, 4, 5, 6, 6, 6, 6, 7, 8, 9, 9]
assert statistics.multimode(data) == [2, 6]
ndata = ['a', 'a', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'c', 'c']
assert statistics.multimode(ndata) == ['b']
ndata = ['a', 'a', 'b', 'b', 'b', 'b', 'c', 'c', 'd', 'd', 'd', 'd', 'e', 'e', 'f', 'f', 'f', 'f', 'g', 'g']
assert statistics.multimode(ndata) == ['b', 'd', 'f']
test_multimode()
@test
def test_quantiles():
for n in range(2, 10):
data = [10.0] * n
assert statistics.quantiles(data) == [10.0, 10.0, 10.0]
assert statistics.quantiles(data, method='inclusive') == [10.0, 10.0, 10.0]
data = [100, 200, 400, 800]
for n, expected in [
(2, [300.0]),
(3, [200.0, 400.0]),
(4, [175.0, 300.0, 500.0]),
(5, [160.0, 240.0, 360.0, 560.0]),
(6, [150.0, 200.0, 300.0, 400.0, 600.0]),
(8, [137.5, 175.0, 225.0, 300.0, 375.0, 500.0,650.0]),
(10, [130.0, 160.0, 190.0, 240.0, 300.0, 360.0, 440.0, 560.0, 680.0]),
(12, [125.0, 150.0, 175.0, 200.0, 250.0, 300.0, 350.0, 400.0, 500.0, 600.0, 700.0]),
(15, [120.0, 140.0, 160.0, 180.0, 200.0, 240.0, 280.0, 320.0, 360.0, 400.0, 480.0, 560.0, 640.0, 720.0])]:
assert statistics.quantiles(data, n=n, method="inclusive") == expected
test_quantiles()
@test
def test_mean():
data = [100.0, 200.0, 400.0, 800.0]
assert statistics.mean(data) == 375.0
data = [17.25, 19.75, 20.0, 21.5, 21.75, 23.25, 25.125, 27.5]
assert statistics.mean(data) == 22.015625
data = [0.0, 1.0, 2.0, 3.0, 3.0, 3.0, 4.0, 5.0, 5.0, 6.0, 7.0, 7.0, 7.0, 7.0, 8.0, 9.0]
assert statistics.mean(data) == 4.8125
test_mean()
@test
def test_geometric_mean():
PRECISION = 1e-6
data = [54.0, 24.0, 36.0]
assert math.fabs(statistics.geometric_mean(data) - 36) < PRECISION
data = [4.0, 9.0]
assert math.fabs(statistics.geometric_mean(data) - 6) < PRECISION
data = [17.625]
assert math.fabs(statistics.geometric_mean(data) - 17.625) < PRECISION
data = [3.5, 4.0, 5.25]
assert math.fabs(statistics.geometric_mean(data) - 4.18886) < PRECISION
test_geometric_mean()
@test
def test_harmonic_mean():
data = [1.0, 0.0, 2.0]
assert statistics.harmonic_mean(data) == 0
data = [2.0, 4.0, 4.0, 8.0, 16.0, 16.0]
assert statistics.harmonic_mean(data) == 6*4/5
data = [1/8, 1/4, 1/4, 1/2, 1/2]
assert statistics.harmonic_mean(data) == 1/4
for x in range(1, 101):
assert statistics.harmonic_mean([float(x)]) == float(x)
test_harmonic_mean()
@test
def test_pvariance():
data = [float(i) for i in range(10000)]
assert statistics.pvariance(data) == (10000**2 - 1)/12
data = [4.0, 7.0, 13.0, 16.0]
assert statistics.pvariance(data) == 22.5
data = [1/4, 1/4, 3/4, 7/4]
assert statistics.pvariance(data) == 3/8
test_pvariance()
@test
def test_pstdev():
data = [float(i) for i in range(10000)]
assert statistics.pstdev(data) == math.sqrt(statistics.pvariance(data))
data = [4.0, 7.0, 13.0, 16.0]
assert statistics.pstdev(data) == math.sqrt(statistics.pvariance(data))
data = [1/4, 1/4, 3/4, 7/4]
assert statistics.pstdev(data) == math.sqrt(statistics.pvariance(data))
test_pstdev()
@test
def test_variance():
data = [4.0, 7.0, 13.0, 16.0]
assert statistics.variance(data) == 30.0
data = [1/4, 1/4, 3/4, 7/4]
assert statistics.variance(data) == 1/2
test_variance()
@test
def test_stdev():
data = [4.0, 7.0, 13.0, 16.0]
assert statistics.stdev(data) == math.sqrt(statistics.variance(data))
data = [1/4, 1/4, 3/4, 7/4]
assert statistics.stdev(data) == math.sqrt(statistics.variance(data))
test_stdev()
@test
def test_mean_NormalDist():
X = statistics.NormalDist(10000.0, 3.0)
assert X.mean == 10000.0
test_mean_NormalDist()
@test
def test_stdev():
X = statistics.NormalDist(10000.0, 3.0)
assert X.stdev == 3.0
test_stdev()
@test
def test_variance():
X = statistics.NormalDist(10000.0, 3.0)
assert X.variance == 9.0
test_variance()
@test
def test_pdf():
PRECISION = 1e-6
X = statistics.NormalDist(100.0, 15.0)
# verify peak around center
assert X.pdf(99.0) < X.pdf(100.0)
assert X.pdf(101.0) < X.pdf(100.0)
for i in range(50):
assert math.fabs((X.pdf(100.0 - float(i)) - X.pdf(100.0 + float(i)))) < PRECISION
test_pdf()
@test
def test_cdf():
X = statistics.NormalDist(100.0, 15.0)
# Verify center (should be exact)
assert X.cdf(100.0) == 0.50
test_cdf()
@test
def test_inv_cdf():
PRECISION = 1e-6
iq = statistics.NormalDist(100.0, 15.0)
assert iq.inv_cdf(0.50) == iq.mean
# One hundred ever smaller probabilities to test tails out to
# extreme probabilities: 1 / 2**50 and (2**50-1) / 2 ** 50
for e in range(1, 51):
p = 2.0 ** (-e)
assert math.fabs(iq.cdf(iq.inv_cdf(p)) - p) < PRECISION
p = 1.0 - p
assert math.fabs(iq.cdf(iq.inv_cdf(p)) - p) < PRECISION
test_inv_cdf ()
@test
def test_ND_qualtiles():
PRECISION = 1e-6
Z = statistics.NormalDist(0.0, 1.0)
for n, expected in [
(2, [0.0]),
(3, [-0.430727, 0.430727]),
(4 ,[-0.67449, 0.0, 0.67449])
]:
actual = Z.quantiles(n)
for i in range(len(expected)):
assert math.fabs(actual[i] - expected[i]) < PRECISION
test_ND_qualtiles()
@test
def test_overlap():
PRECISION = 1e-5
for X1, X2, published_result in [
(statistics.NormalDist(0.0, 2.0), statistics.NormalDist(1.0, 2.0), 0.80258),
(statistics.NormalDist(0.0, 1.0), statistics.NormalDist(1.0, 2.0), 0.60993),
]:
assert math.fabs(X1.overlap(X2) - published_result) < PRECISION
assert math.fabs(X2.overlap(X1) - published_result) < PRECISION
test_overlap()
@test
def test_samples():
mu, sigma = 10000.0, 3.0
X = statistics.NormalDist(mu, sigma)
n = 1000
data = X.samples(n)
assert len(data) == n
test_samples()
@test
def test_from_samples():
data = [96.0, 107.0, 90.0, 92.0, 110.0]
ND = statistics.NormalDist.from_samples(data)
assert ND == statistics.NormalDist(99.0, 9.0)
test_from_samples()