codon/test/stdlib/statistics_test.codon

import statistics
import math

@test
def med():
    # Test median with even nuber of int data points.
    data = [1, 2, 3, 4, 5, 6]
    assert statistics.median(data) == 3.5

    # Test median with an odd number of int data points.
    data = [1, 2, 3, 4, 5, 6, 9]
    assert statistics.median(data) == 4

    # Test median works with an odd number of Fractions.
    fdata = [1/7, 2/7, 3/7, 4/7, 5/7]
    assert statistics.median(fdata) == 3/7

    # Test median works with an even number of Fractions.
    fdata = [1/7, 2/7, 3/7, 4/7, 5/7, 6/7]
    assert statistics.median(fdata) == 1/2

    # Test median works with an odd number of Decimals.
    ddata = [2.5, 3.1, 4.2, 5.7, 5.8]
    assert statistics.median(ddata) == 4.2
med()

@test
def med_low():
    # Test median_low with an even number of ints.
    data = [1, 2, 3, 4, 5, 6]
    assert statistics.median_low(data) == 3

    # Test median_low works with an even number of Fractions.
    fdata = [1/7, 2/7, 3/7, 4/7, 5/7, 6/7]
    assert statistics.median_low(fdata) == 3/7

    # Test median_low works with an even number of Decimals.
    ddata = [1.1, 2.2, 3.3, 4.4, 5.5, 6.6]
    assert statistics.median_low(ddata) == 3.3
med_low()

@test
def med_high():
    # Test median_high with an even number of ints.
    data = [1, 2, 3, 4, 5, 6]
    assert statistics.median_high(data) == 4

    # Test median_high works with an even number of Fractions.
    fdata = [1/7, 2/7, 3/7, 4/7, 5/7, 6/7]
    assert statistics.median_high(fdata) == 4/7

    # Test median_high works with an even number of Decimals.
    ddata = [1.1, 2.2, 3.3, 4.4, 5.5, 6.6]
    assert statistics.median_high(ddata) == 4.4
med_high()

@test
def med_grouped():
    # Test median_grouped with repeated median values.
    data = [12, 13, 14, 14, 14, 15, 15]
    assert statistics.median_grouped(data) == 14

    data = [12, 13, 14, 14, 14, 14, 15]
    assert statistics.median_grouped(data) == 13.875

    data = [5, 10, 10, 15, 20, 20, 20, 20, 25, 25, 30]
    assert statistics.median_grouped(data, 5) == 19.375

    # Test median_grouped with repeated median values.
    data = [2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 6, 6]
    assert statistics.median_grouped(data) == 4.5

    data = [3, 4, 4, 4, 5, 5, 5, 5, 6, 6]
    assert statistics.median_grouped(data) == 4.75

    # Test median_grouped with repeated single values.
    ddata = [3.2]
    assert statistics.median_grouped(ddata) == 3.2

    # Test median_grouped works with an odd number of Fractions.
    fdata = [5/4, 9/4, 13/4, 13/4, 17/4]
    assert statistics.median_grouped(fdata) == 3.0

    # Test median_grouped works with an even number of Fractions.
    fdata = [5/4, 9/4, 13/4, 13/4, 17/4, 17/4]
    assert statistics.median_grouped(fdata) == 3.25

    # Test median_grouped works with an odd number of Decimals.
    ddata = [5.5, 6.5, 6.5, 7.5, 8.5]
    assert statistics.median_grouped(ddata) == 6.75

    # Test median_grouped works with an even number of Decimals.
    ddata = [5.5, 5.5, 6.5, 6.5, 7.5, 8.5]
    assert statistics.median_grouped(ddata) == 6.5
med_grouped()

@test
def test_mode():
    data = [12, 13, 14, 14, 14, 15, 15]
    assert statistics.mode(data) == 14

    data = list(range(20, 50, 3))
    assert statistics.mode(data) == 20

    # Test mode with nominal data.
    ndata = ['a', 'b', 'c', 'b', 'd', 'b']
    assert statistics.mode(ndata) == 'b'

    ndata = ['fe', 'fi', 'fo', 'fum', 'fi', 'fi']
    assert statistics.mode(ndata) == 'fi'

    # Test mode with bimodal data.
    data = [1, 1, 2, 2, 2, 2, 3, 4, 5, 6, 6, 6, 6, 7, 8, 9, 9]
    assert statistics.mode(data) == 2

    # Test mode when data points are all unique.
    data = list(range(10))
    assert statistics.mode(data) == 0
test_mode()

@test
def test_multimode():
    data = [1, 1, 2, 2, 2, 2, 3, 4, 5, 6, 6, 6, 6, 7, 8, 9, 9]
    assert statistics.multimode(data) == [2, 6]

    ndata = ['a', 'a', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'c', 'c']
    assert statistics.multimode(ndata) == ['b']

    ndata = ['a', 'a', 'b', 'b', 'b', 'b', 'c', 'c', 'd', 'd', 'd', 'd', 'e', 'e', 'f', 'f', 'f', 'f', 'g', 'g']
    assert statistics.multimode(ndata) == ['b', 'd', 'f']
test_multimode()

@test
def test_quantiles():
    for n in range(2, 10):
        data = [10.0] * n
        assert statistics.quantiles(data) == [10.0, 10.0, 10.0]
        assert statistics.quantiles(data, method='inclusive') == [10.0, 10.0, 10.0]

    data = [100, 200, 400, 800]
    for n, expected in [
        (2, [300.0]),
        (3, [200.0, 400.0]),
        (4, [175.0, 300.0, 500.0]),
        (5, [160.0, 240.0, 360.0, 560.0]),
        (6, [150.0, 200.0, 300.0, 400.0, 600.0]),
        (8, [137.5, 175.0, 225.0, 300.0, 375.0, 500.0,650.0]),
        (10, [130.0, 160.0, 190.0, 240.0, 300.0, 360.0, 440.0, 560.0, 680.0]),
        (12, [125.0, 150.0, 175.0, 200.0, 250.0, 300.0, 350.0, 400.0, 500.0, 600.0, 700.0]),
        (15, [120.0, 140.0, 160.0, 180.0, 200.0, 240.0, 280.0, 320.0, 360.0, 400.0, 480.0, 560.0, 640.0, 720.0])]:
        assert statistics.quantiles(data, n=n, method="inclusive") == expected
test_quantiles()

@test
def test_mean():
    data = [100.0, 200.0, 400.0, 800.0]
    assert statistics.mean(data) == 375.0

    data = [17.25, 19.75, 20.0, 21.5, 21.75, 23.25, 25.125, 27.5]
    assert statistics.mean(data) == 22.015625

    data = [0.0, 1.0, 2.0, 3.0, 3.0, 3.0, 4.0, 5.0, 5.0, 6.0, 7.0, 7.0, 7.0, 7.0, 8.0, 9.0]
    assert statistics.mean(data) == 4.8125
test_mean()

@test
def test_geometric_mean():
    PRECISION = 1e-6

    data = [54.0, 24.0, 36.0]
    assert math.fabs(statistics.geometric_mean(data) - 36) < PRECISION

    data = [4.0, 9.0]
    assert math.fabs(statistics.geometric_mean(data) - 6) < PRECISION

    data = [17.625]
    assert math.fabs(statistics.geometric_mean(data) - 17.625) < PRECISION

    data = [3.5, 4.0, 5.25]
    assert math.fabs(statistics.geometric_mean(data) - 4.18886) < PRECISION
test_geometric_mean()

@test
def test_harmonic_mean():
    data = [1.0, 0.0, 2.0]
    assert statistics.harmonic_mean(data) == 0

    data = [2.0, 4.0, 4.0, 8.0, 16.0, 16.0]
    assert statistics.harmonic_mean(data) == 6*4/5

    data = [1/8, 1/4, 1/4, 1/2, 1/2]
    assert statistics.harmonic_mean(data) == 1/4

    for x in range(1, 101):
        assert statistics.harmonic_mean([float(x)]) == float(x)
test_harmonic_mean()

@test
def test_pvariance():
    data = [float(i) for i in range(10000)]
    assert statistics.pvariance(data) == (10000**2 - 1)/12

    data = [4.0, 7.0, 13.0, 16.0]
    assert statistics.pvariance(data) == 22.5

    data = [1/4, 1/4, 3/4, 7/4]
    assert statistics.pvariance(data) == 3/8
test_pvariance()

@test
def test_pstdev():
    data = [float(i) for i in range(10000)]
    assert statistics.pstdev(data) == math.sqrt(statistics.pvariance(data))

    data = [4.0, 7.0, 13.0, 16.0]
    assert statistics.pstdev(data) == math.sqrt(statistics.pvariance(data))

    data = [1/4, 1/4, 3/4, 7/4]
    assert statistics.pstdev(data) == math.sqrt(statistics.pvariance(data))
test_pstdev()

@test
def test_variance():
    data = [4.0, 7.0, 13.0, 16.0]
    assert statistics.variance(data) == 30.0

    data = [1/4, 1/4, 3/4, 7/4]
    assert statistics.variance(data) == 1/2
test_variance()

@test
def test_stdev():
    data = [4.0, 7.0, 13.0, 16.0]
    assert statistics.stdev(data) == math.sqrt(statistics.variance(data))

    data = [1/4, 1/4, 3/4, 7/4]
    assert statistics.stdev(data) == math.sqrt(statistics.variance(data))
test_stdev()

@test
def test_mean_NormalDist():
    X = statistics.NormalDist(10000.0, 3.0)
    assert X.mean == 10000.0
test_mean_NormalDist()

@test
def test_stdev():
    X = statistics.NormalDist(10000.0, 3.0)
    assert X.stdev == 3.0
test_stdev()

@test
def test_variance():
    X = statistics.NormalDist(10000.0, 3.0)
    assert X.variance == 9.0
test_variance()

@test
def test_pdf():
    PRECISION = 1e-6
    X = statistics.NormalDist(100.0, 15.0)

    # verify peak around center
    assert X.pdf(99.0) < X.pdf(100.0)
    assert X.pdf(101.0) < X.pdf(100.0)

    for i in range(50):
        assert math.fabs((X.pdf(100.0 - float(i)) - X.pdf(100.0 + float(i)))) < PRECISION
test_pdf()

@test
def test_cdf():
    X = statistics.NormalDist(100.0, 15.0)
    # Verify center (should be exact)
    assert X.cdf(100.0) == 0.50
test_cdf()

@test
def test_inv_cdf():
    PRECISION = 1e-6
    iq = statistics.NormalDist(100.0, 15.0)
    assert iq.inv_cdf(0.50) == iq.mean

    # One hundred ever smaller probabilities to test tails out to
    # extreme probabilities: 1 / 2**50 and (2**50-1) / 2 ** 50
    for e in range(1, 51):
        p = 2.0 ** (-e)
        assert math.fabs(iq.cdf(iq.inv_cdf(p)) - p) < PRECISION
        p = 1.0 - p
        assert math.fabs(iq.cdf(iq.inv_cdf(p)) - p) < PRECISION
test_inv_cdf ()

@test
def test_ND_qualtiles():
    PRECISION = 1e-6
    Z = statistics.NormalDist(0.0, 1.0)
    for n, expected in [
        (2, [0.0]),
        (3, [-0.430727, 0.430727]),
        (4 ,[-0.67449, 0.0, 0.67449])
        ]:
        actual = Z.quantiles(n)
        for i in range(len(expected)):
            assert math.fabs(actual[i] - expected[i]) < PRECISION
test_ND_qualtiles()

@test
def test_overlap():
    PRECISION = 1e-5
    for X1, X2, published_result in [
                (statistics.NormalDist(0.0, 2.0), statistics.NormalDist(1.0, 2.0), 0.80258),
                (statistics.NormalDist(0.0, 1.0), statistics.NormalDist(1.0, 2.0), 0.60993),
            ]:
        assert math.fabs(X1.overlap(X2) - published_result) < PRECISION
        assert math.fabs(X2.overlap(X1) - published_result) < PRECISION
test_overlap()

@test
def test_samples():
    mu, sigma = 10000.0, 3.0
    X = statistics.NormalDist(mu, sigma)
    n = 1000
    data = X.samples(n)
    assert len(data) == n
test_samples()

@test
def test_from_samples():
    data = [96.0, 107.0, 90.0, 92.0, 110.0]
    ND = statistics.NormalDist.from_samples(data)
    assert ND == statistics.NormalDist(99.0, 9.0)
test_from_samples()