From 5de12ee2f722dc51e4db62a5bad3311f180d12b5 Mon Sep 17 00:00:00 2001 From: "A. R. Shajii" Date: Wed, 25 Jan 2023 13:01:22 -0500 Subject: [PATCH] Add math.fsum() (#182) --- stdlib/math.codon | 92 ++++++++++++++++++++++++++ test/stdlib/math_test.codon | 125 ++++++++++++++++++++++++++++++++++++ 2 files changed, 217 insertions(+) diff --git a/stdlib/math.codon b/stdlib/math.codon index 61938bf9..61780dd5 100644 --- a/stdlib/math.codon +++ b/stdlib/math.codon @@ -587,6 +587,98 @@ def isclose(a: float, b: float, rel_tol: float = 1e-09, abs_tol: float = 0.0) -> diff <= abs_tol ) +def fsum(seq): + def _fsum_realloc(p: Ptr[float], ps: Ptr[float], n: int, m: int): + from internal.gc import realloc, sizeof + v = Ptr[float]() + m += m + if n < m: + if p == ps: + v = Ptr[float](m) + str.memcpy(v.as_byte(), ps.as_byte(), n * sizeof(float)) + else: + v = Ptr[float](realloc(p.as_byte(), m * sizeof(float), n * sizeof(float))) + return v, m + + _NUM_PARTIALS: Static[int] = 32 + ps_arr = __array__[float](_NUM_PARTIALS) + ps = ps_arr.ptr + p = ps + n, m = 0, _NUM_PARTIALS + xsave, special_sum, inf_sum = 0.0, 0.0, 0.0 + hi, yr, lo = 0.0, 0.0, 0.0 + + for item in seq: + x = float(item) + xsave = x + i = 0 + + for j in range(n): # for y in partials + y = p[j] + if fabs(x) < fabs(y): + x, y = y, x + hi = x + y + yr = hi - x + lo = y - yr + if lo != 0.0: + p[i] = lo + i += 1 + x = hi + + n = i + if x != 0.0: + if not isfinite(x): + # a nonfinite x could arise either as + # a result of intermediate overflow, or + if isfinite(xsave): + raise OverflowError("intermediate overflow in fsum") + if isinf(xsave): + inf_sum += xsave + special_sum += xsave + # reset partials + n = 0 + else: + if n >= m: + p, m = _fsum_realloc(p, ps, n, m) + p[n] = x + n += 1 + + if special_sum != 0.0: + if isnan(inf_sum): + raise ValueError("-inf + inf in fsum") + else: + return special_sum + + hi = 0.0 + if n > 0: + hi = p[n - 1] + n -= 1 + # sum_exact(ps, hi) from the top, stop when the sum becomes inexact + while n > 0: + x = hi + y = p[n - 1] + n -= 1 + # assert fabs(y) < fabs(x) + hi = x + y + yr = hi - x + lo = y - yr + if lo != 0.0: + break + + # Make half-even rounding work across multiple partials. + # Needed so that sum([1e-16, 1, 1e16]) will round-up the last + # digit to two instead of down to zero (the 1e-16 makes the 1 + # slightly closer to two). With a potential 1 ULP rounding + # error fixed-up, math.fsum() can guarantee commutativity. + if n > 0 and ((lo < 0.0 and p[n-1] < 0.0) or (lo > 0.0 and p[n-1] > 0.0)): + y = lo * 2.0 + x = hi + y + yr = x - hi + if y == yr: + hi = x + + return hi + # 32-bit float ops e32 = float32(e) diff --git a/test/stdlib/math_test.codon b/test/stdlib/math_test.codon index c72bd53f..c27d8f06 100644 --- a/test/stdlib/math_test.codon +++ b/test/stdlib/math_test.codon @@ -461,6 +461,130 @@ def test_isclose(): assert math.isclose(INF, NINF) == False +@test +def test_fsum(): + assert math.fsum((42,)) == 42.0 + assert math.fsum((1,2,3)) == 6.0 + assert math.fsum((1,2,-3)) == 0.0 + assert math.fsum(()) == 0.0 + assert math.fsum([.1] * 10) == 1.0 + + mant_dig = 53 + etiny = -1074 + + def sum_exact(p): + n = len(p) + hi = 0.0 + if n > 0: + hi = p[n-1] + n -= 1 + while n > 0: + x = hi + y = p[n-1] + n -= 1 + hi = x + y + yr = hi - x + lo = y - yr + if lo != 0.0: + break + + if n > 0 and ((lo < 0 and p[n-1] < 0) or (lo > 0 and p[n-1] > 0)): + y = lo * 2 + x = hi + y + yr = x - hi + if y == yr: + hi = x + return hi + + def msum(iterable): + "Full precision summation using multiple floats for intermediate values" + # Rounded x+y stored in hi with the round-off stored in lo. Together + # hi+lo are exactly equal to x+y. The inner loop applies hi/lo summation + # to each partial so that the list of partial sums remains exact. + # Depends on IEEE-754 arithmetic guarantees. See proof of correctness at: + # www-2.cs.cmu.edu/afs/cs/project/quake/public/papers/robust-arithmetic.ps + + partials = [] # sorted, non-overlapping partial sums + for x in iterable: + i = 0 + for y in partials: + if abs(x) < abs(y): + x, y = y, x + hi = x + y + lo = y - (hi - x) + if lo: + partials[i] = lo + i += 1 + x = hi + partials[i:] = [x] + return sum_exact(partials) + + @pure + @llvm + def float1() -> float: + ret double 0x401DF11F45F4E61A + + @pure + @llvm + def float2() -> float: + ret double 0xBFE62A2AF1BD3624 + + @pure + @llvm + def float3() -> float: + ret double 0x7C95555555555555 + + test_values = [ + ([], 0.0), + ([0.0], 0.0), + ([1e100, 1.0, -1e100, 1e-100, 1e50, -1.0, -1e50], 1e-100), + ([2.0**53, -0.5, -2.0**-54], 2.0**53-1.0), + ([2.0**53, 1.0, 2.0**-100], 2.0**53+2.0), + ([2.0**53+10.0, 1.0, 2.0**-100], 2.0**53+12.0), + ([2.0**53-4.0, 0.5, 2.0**-54], 2.0**53-3.0), + ([1./n for n in range(1, 1001)], float1()), + ([(-1.)**n/n for n in range(1, 1001)], float2()), + ([1e16, 1., 1e-16], 10000000000000002.0), + ([1e16-2., 1.-2.**-53, -(1e16-2.), -(1.-2.**-53)], 0.0), + # exercise code for resizing partials array + ([2.**n - 2.**(n+50) + 2.**(n+52) for n in range(-1074, 972, 2)] + + [-2.**1022], + float3()), + ] + + # Telescoping sum, with exact differences (due to Sterbenz) + terms = [1.7**i for i in range(1001)] + test_values.append(( + [terms[i+1] - terms[i] for i in range(1000)] + [-terms[1000]], + -terms[0] + )) + + for i, (vals, expected) in enumerate(test_values): + try: + actual = math.fsum(vals) + except OverflowError: + # self.fail("test %d failed: got OverflowError, expected %r " + # "for math.fsum(%.100r)" % (i, expected, vals)) + assert False + except ValueError: + # self.fail("test %d failed: got ValueError, expected %r " + # "for math.fsum(%.100r)" % (i, expected, vals)) + assert False + + assert actual == expected + + from random import random, gauss, shuffle + for j in range(10000): + vals = [7, 1e100, -7, -1e100, -9e-20, 8e-20] * 10 + s = 0. + for i in range(200): + v = gauss(0, random()) ** 7 - s + s += v + vals.append(v) + shuffle(vals) + assert msum(vals) == math.fsum(vals) + + test_isnan() test_isinf() test_isfinite() @@ -504,6 +628,7 @@ test_gcd() test_frexp() test_modf() test_isclose() +test_fsum() # 32-bit float ops