Add math.fsum() (#182)

pull/192/head
A. R. Shajii 2023-01-25 13:01:22 -05:00 committed by GitHub
parent bac6ae58dd
commit 5de12ee2f7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 217 additions and 0 deletions

View File

@ -587,6 +587,98 @@ def isclose(a: float, b: float, rel_tol: float = 1e-09, abs_tol: float = 0.0) ->
diff <= abs_tol
)
def fsum(seq):
def _fsum_realloc(p: Ptr[float], ps: Ptr[float], n: int, m: int):
from internal.gc import realloc, sizeof
v = Ptr[float]()
m += m
if n < m:
if p == ps:
v = Ptr[float](m)
str.memcpy(v.as_byte(), ps.as_byte(), n * sizeof(float))
else:
v = Ptr[float](realloc(p.as_byte(), m * sizeof(float), n * sizeof(float)))
return v, m
_NUM_PARTIALS: Static[int] = 32
ps_arr = __array__[float](_NUM_PARTIALS)
ps = ps_arr.ptr
p = ps
n, m = 0, _NUM_PARTIALS
xsave, special_sum, inf_sum = 0.0, 0.0, 0.0
hi, yr, lo = 0.0, 0.0, 0.0
for item in seq:
x = float(item)
xsave = x
i = 0
for j in range(n): # for y in partials
y = p[j]
if fabs(x) < fabs(y):
x, y = y, x
hi = x + y
yr = hi - x
lo = y - yr
if lo != 0.0:
p[i] = lo
i += 1
x = hi
n = i
if x != 0.0:
if not isfinite(x):
# a nonfinite x could arise either as
# a result of intermediate overflow, or
if isfinite(xsave):
raise OverflowError("intermediate overflow in fsum")
if isinf(xsave):
inf_sum += xsave
special_sum += xsave
# reset partials
n = 0
else:
if n >= m:
p, m = _fsum_realloc(p, ps, n, m)
p[n] = x
n += 1
if special_sum != 0.0:
if isnan(inf_sum):
raise ValueError("-inf + inf in fsum")
else:
return special_sum
hi = 0.0
if n > 0:
hi = p[n - 1]
n -= 1
# sum_exact(ps, hi) from the top, stop when the sum becomes inexact
while n > 0:
x = hi
y = p[n - 1]
n -= 1
# assert fabs(y) < fabs(x)
hi = x + y
yr = hi - x
lo = y - yr
if lo != 0.0:
break
# Make half-even rounding work across multiple partials.
# Needed so that sum([1e-16, 1, 1e16]) will round-up the last
# digit to two instead of down to zero (the 1e-16 makes the 1
# slightly closer to two). With a potential 1 ULP rounding
# error fixed-up, math.fsum() can guarantee commutativity.
if n > 0 and ((lo < 0.0 and p[n-1] < 0.0) or (lo > 0.0 and p[n-1] > 0.0)):
y = lo * 2.0
x = hi + y
yr = x - hi
if y == yr:
hi = x
return hi
# 32-bit float ops
e32 = float32(e)

View File

@ -461,6 +461,130 @@ def test_isclose():
assert math.isclose(INF, NINF) == False
@test
def test_fsum():
assert math.fsum((42,)) == 42.0
assert math.fsum((1,2,3)) == 6.0
assert math.fsum((1,2,-3)) == 0.0
assert math.fsum(()) == 0.0
assert math.fsum([.1] * 10) == 1.0
mant_dig = 53
etiny = -1074
def sum_exact(p):
n = len(p)
hi = 0.0
if n > 0:
hi = p[n-1]
n -= 1
while n > 0:
x = hi
y = p[n-1]
n -= 1
hi = x + y
yr = hi - x
lo = y - yr
if lo != 0.0:
break
if n > 0 and ((lo < 0 and p[n-1] < 0) or (lo > 0 and p[n-1] > 0)):
y = lo * 2
x = hi + y
yr = x - hi
if y == yr:
hi = x
return hi
def msum(iterable):
"Full precision summation using multiple floats for intermediate values"
# Rounded x+y stored in hi with the round-off stored in lo. Together
# hi+lo are exactly equal to x+y. The inner loop applies hi/lo summation
# to each partial so that the list of partial sums remains exact.
# Depends on IEEE-754 arithmetic guarantees. See proof of correctness at:
# www-2.cs.cmu.edu/afs/cs/project/quake/public/papers/robust-arithmetic.ps
partials = [] # sorted, non-overlapping partial sums
for x in iterable:
i = 0
for y in partials:
if abs(x) < abs(y):
x, y = y, x
hi = x + y
lo = y - (hi - x)
if lo:
partials[i] = lo
i += 1
x = hi
partials[i:] = [x]
return sum_exact(partials)
@pure
@llvm
def float1() -> float:
ret double 0x401DF11F45F4E61A
@pure
@llvm
def float2() -> float:
ret double 0xBFE62A2AF1BD3624
@pure
@llvm
def float3() -> float:
ret double 0x7C95555555555555
test_values = [
([], 0.0),
([0.0], 0.0),
([1e100, 1.0, -1e100, 1e-100, 1e50, -1.0, -1e50], 1e-100),
([2.0**53, -0.5, -2.0**-54], 2.0**53-1.0),
([2.0**53, 1.0, 2.0**-100], 2.0**53+2.0),
([2.0**53+10.0, 1.0, 2.0**-100], 2.0**53+12.0),
([2.0**53-4.0, 0.5, 2.0**-54], 2.0**53-3.0),
([1./n for n in range(1, 1001)], float1()),
([(-1.)**n/n for n in range(1, 1001)], float2()),
([1e16, 1., 1e-16], 10000000000000002.0),
([1e16-2., 1.-2.**-53, -(1e16-2.), -(1.-2.**-53)], 0.0),
# exercise code for resizing partials array
([2.**n - 2.**(n+50) + 2.**(n+52) for n in range(-1074, 972, 2)] +
[-2.**1022],
float3()),
]
# Telescoping sum, with exact differences (due to Sterbenz)
terms = [1.7**i for i in range(1001)]
test_values.append((
[terms[i+1] - terms[i] for i in range(1000)] + [-terms[1000]],
-terms[0]
))
for i, (vals, expected) in enumerate(test_values):
try:
actual = math.fsum(vals)
except OverflowError:
# self.fail("test %d failed: got OverflowError, expected %r "
# "for math.fsum(%.100r)" % (i, expected, vals))
assert False
except ValueError:
# self.fail("test %d failed: got ValueError, expected %r "
# "for math.fsum(%.100r)" % (i, expected, vals))
assert False
assert actual == expected
from random import random, gauss, shuffle
for j in range(10000):
vals = [7, 1e100, -7, -1e100, -9e-20, 8e-20] * 10
s = 0.
for i in range(200):
v = gauss(0, random()) ** 7 - s
s += v
vals.append(v)
shuffle(vals)
assert msum(vals) == math.fsum(vals)
test_isnan()
test_isinf()
test_isfinite()
@ -504,6 +628,7 @@ test_gcd()
test_frexp()
test_modf()
test_isclose()
test_fsum()
# 32-bit float ops