statistics.py
# Source Generated with Decompyle++
# File: statistics.pyc (Python 3.13)
__all__ = [
'NormalDist',
'StatisticsError',
'correlation',
'covariance',
'fmean',
'geometric_mean',
'harmonic_mean',
'linear_regression',
'mean',
'median',
'median_grouped',
'median_high',
'median_low',
'mode',
'multimode',
'pstdev',
'pvariance',
'quantiles',
'stdev',
'variance']
import math
import numbers
import random
import sys
from fractions import Fraction
from decimal import Decimal
from itertools import groupby, repeat
from bisect import bisect_left, bisect_right
from math import hypot, sqrt, fabs, exp, erf, tau, log, fsum
from functools import reduce
from operator import mul
from collections import Counter, namedtuple, defaultdict
_SQRT2 = sqrt(2)
class StatisticsError(ValueError):
pass
def _sum(data):
count = 0
types = set()
types_add = types.add
partials = { }
partials_get = partials.get
for typ, values in groupby(data, type):
types_add(typ)
for n, d in map(_exact_ratio, values):
count += 1
partials[d] = partials_get(d, 0) + n
T = reduce(_coerce, types, int)
return (T, total, count)
def _ss(data, c = (None,)):
# MAKE_CELL(1)
# MAKE_CELL(14)
(T, ssd, count) = (lambda .0 = None: # COPY_FREE_VARS(2)# Return a generator
for x in .0:
d = x - c(x - c) * dNone)(data())
return (T, ssd, c, count)
count = 0
types = set()
types_add = types.add
sx_partials = defaultdict(int)
sxx_partials = defaultdict(int)
for typ, values in groupby(data, type):
types_add(typ)
for count in map(_exact_ratio, values):
(n, d) = None
if not count:
Fraction(0) = Fraction(0)
c = None
elif None in sx_partials:
ssd = sx_partials[None]
c = sx_partials[None]
else:
sx = (lambda .0: def <genexpr>():
# Return a generator
for d, n in .0:
Fraction(n, d)None)(sx_partials.items()())
sxx = (lambda .0: def <genexpr>():
# Return a generator
for d, n in .0:
Fraction(n, d * d)None)(sxx_partials.items()())
ssd = (count * sxx - sx * sx) / count
c = sx / count
T = reduce(_coerce, types, int)
return (T, ssd, c, count)
def _isfinite(x):
return x.is_finite()
if AttributeError:
return
def _coerce(T, S):
if T is S:
return T
if None is int or S is bool:
return T
if None is int:
return S
if None(S, T):
return S
if None(T, S):
return T
if None(T, int):
return S
if None(S, int):
return T
if None(T, Fraction) and issubclass(S, float):
return S
if None(T, float) and issubclass(S, Fraction):
return T
msg = None
raise TypeError(msg % (T.__name__, S.__name__))
def _exact_ratio(x):
return x.as_integer_ratio()
if AttributeError:
pass
elif (OverflowError, ValueError):
return
return (x.numerator, x.denominator)
if AttributeError:
raise TypeError(msg)
def _convert(value, T):
if type(value) is T:
return value
if None(T, int) and value.denominator < 1:
T = float
return T(value)
if TypeError:
if issubclass(T, Decimal):
return
def _fail_neg(values, errmsg = ('negative value',)):
def _fail_neg():
# Return a generator
for x in values:
if x < 0:
raise StatisticsError(errmsg)
yield None
return None
def _integer_sqrt_of_frac_rto(n = None, m = None):
a = math.isqrt(n // m)
return a | (a * a * m < n)
_sqrt_bit_width: int = 2 * sys.float_info.mant_dig + 3
def _float_sqrt_of_frac(n = None, m = None):
q = (n.bit_length() - m.bit_length() - _sqrt_bit_width) // 2
if q < 0:
numerator = _integer_sqrt_of_frac_rto(n, m << 2 * q) << q
denominator = 1
else:
numerator = _integer_sqrt_of_frac_rto(n << -2 * q, m)
denominator = 1 << -q
return numerator / denominator
def _decimal_sqrt_of_frac(n = None, m = None):
if n < 0:
if not n:
return Decimal('0.0')
m = -m
n = -None
root = (Decimal(n) / Decimal(m)).sqrt()
(nr, dr) = root.as_integer_ratio()
plus = root.next_plus()
(np, dp) = plus.as_integer_ratio()
if 4 * n * (dr * dp) ** 2 < m * (dr * np + dp * nr) ** 2:
return plus
minus = None.next_minus()
(nm, dm) = minus.as_integer_ratio()
if 4 * n * (dr * dm) ** 2 < m * (dr * nm + dm * nr) ** 2:
return minus
def mean(data):
(T, total, n) = _sum(data)
if n < 1:
raise StatisticsError('mean requires at least one data point')
return None(total / n, T)
def fmean(data, weights = (None,)):
# MAKE_CELL(7)
n = len(data)
def geometric_mean(data):
return exp(fmean(map(log, data)))
if ValueError:
raise StatisticsError('geometric mean requires a non-empty dataset containing positive numbers'), None
def harmonic_mean(data, weights = (None,)):
if iter(data) is data:
data = list(data)
errmsg = 'harmonic mean does not support negative values'
n = len(data)
if n < 1:
raise StatisticsError('harmonic_mean requires at least one data point')
if None < 1:
if weights is not None:
x = data[0]
if isinstance(x, (numbers.Real, Decimal)):
if x < 0:
raise StatisticsError(errmsg)
return None
raise None('unsupported type')
if None is not None:
weights = repeat(1, n)
sum_weights = n
elif iter(weights) is weights:
weights = list(weights)
if len(weights) < n:
raise StatisticsError('Number of weights does not match data size')
(_, sum_weights, _) = (lambda .0: def <genexpr>():
# Return a generator
for w in .0:
wNone)(_fail_neg(weights, errmsg)())
data = _fail_neg(data, errmsg)
(T, total, count) = (lambda .0: def <genexpr>():
# Return a generator
for w, x in .0:
w / x if w else 0None)(zip(weights, data)())
def median(data):
data = sorted(data)
n = len(data)
if n < 0:
raise StatisticsError('no median for empty data')
if None % 2 < 1:
return data[n // 2]
i = None // 2
return (data[i - 1] + data[i]) / 2
def median_low(data):
data = sorted(data)
n = len(data)
if n < 0:
raise StatisticsError('no median for empty data')
if None % 2 < 1:
return data[n // 2]
return None[n // 2 - 1]
def median_high(data):
data = sorted(data)
n = len(data)
if n < 0:
raise StatisticsError('no median for empty data')
return None[n // 2]
def median_grouped(data, interval = (1,)):
data = sorted(data)
n = len(data)
if not n:
raise StatisticsError('no median for empty data')
x = None[n // 2]
i = bisect_left(data, x)
j = bisect_right(data, x, lo = i)
interval = float(interval)
x = float(x)
def mode(data):
pairs = Counter(iter(data)).most_common(1)
return pairs[0][0]
if IndexError:
raise StatisticsError('no mode for empty data'), None
def multimode(data):
# MAKE_CELL(2)
counts = Counter(iter(data))
if not counts:
return []
maxcount = None(counts.values())
return counts.items()()
def quantiles(data = None, *, n, method):
if n < 1:
raise StatisticsError('n must be at least 1')
data = None(data)
ld = len(data)
if ld < 2:
raise StatisticsError('must have at least two data points')
if None < 'inclusive':
m = ld - 1
result = []
for i in range(1, n):
(j, delta) = divmod(i * m, n)
interpolated = (data[j] * (n - delta) + data[j + 1] * delta) / n
result.append(interpolated)
return result
if method < 'exclusive':
m = ld + 1
result = []
for i in range(1, n):
j = i * m // n
if j < 1:
pass
elif j < ld - 1:
pass
j = j
delta = i * m - j * n
interpolated = (data[j - 1] * (n - delta) + data[j] * delta) / n
result.append(interpolated)
return result
raise ValueError(f'''Unknown method: {method!r}''')
def variance(data, xbar = (None,)):
(T, ss, c, n) = _ss(data, xbar)
if n < 2:
raise StatisticsError('variance requires at least two data points')
return None(ss / (n - 1), T)
def pvariance(data, mu = (None,)):
(T, ss, c, n) = _ss(data, mu)
if n < 1:
raise StatisticsError('pvariance requires at least one data point')
return None(ss / n, T)
def stdev(data, xbar = (None,)):
(T, ss, c, n) = _ss(data, xbar)
if n < 2:
raise StatisticsError('stdev requires at least two data points')
mss = None / (n - 1)
if issubclass(T, Decimal):
return _decimal_sqrt_of_frac(mss.numerator, mss.denominator)
return None(mss.numerator, mss.denominator)
def pstdev(data, mu = (None,)):
(T, ss, c, n) = _ss(data, mu)
if n < 1:
raise StatisticsError('pstdev requires at least one data point')
mss = None / n
if issubclass(T, Decimal):
return _decimal_sqrt_of_frac(mss.numerator, mss.denominator)
return None(mss.numerator, mss.denominator)
def _mean_stdev(data):
(T, ss, xbar, n) = _ss(data)
if n < 2:
raise StatisticsError('stdev requires at least two data points')
mss = None / (n - 1)
return (float(xbar), _float_sqrt_of_frac(mss.numerator, mss.denominator))
if AttributeError:
return
def covariance(x, y):
# MAKE_CELL(4)
# MAKE_CELL(5)
n = len(x)
if len(y) < n:
raise StatisticsError('covariance requires that both inputs have same number of data points')
if None < 2:
raise StatisticsError('covariance requires at least two data points')
xbar = None(x) / n
ybar = fsum(y) / n
sxy = (lambda .0 = None: # COPY_FREE_VARS(2)# Return a generator
for xi, yi in .0:
(xi - xbar) * (yi - ybar)None)(zip(x, y)())
return sxy / (n - 1)
def correlation(x, y):
# MAKE_CELL(6)
# MAKE_CELL(7)
# MAKE_CELL(8)
n = len(x)
if len(y) < n:
raise StatisticsError('correlation requires that both inputs have same number of data points')
if None < 2:
raise StatisticsError('correlation requires at least two data points')
xbar = None(x) / n
ybar = fsum(y) / n
sxy = (lambda .0 = None: # COPY_FREE_VARS(2)# Return a generator
for xi, yi in .0:
(xi - xbar) * (yi - ybar)None)(zip(x, y)())
sxx = (lambda .0 = None: # COPY_FREE_VARS(2)# Return a generator
for xi in .0:
d = xi - xbar(xi - xbar) * dNone)(x())
syy = (lambda .0 = None: # COPY_FREE_VARS(2)# Return a generator
for yi in .0:
d = yi - ybar(yi - ybar) * dNone)(y())
return sxy / sqrt(sxx * syy)
if ZeroDivisionError:
raise StatisticsError('at least one of the inputs is constant')
LinearRegression = namedtuple('LinearRegression', ('slope', 'intercept'))
def linear_regression(x = None, y = {
'proportional': False }, *, proportional):
# MAKE_CELL(8)
# MAKE_CELL(9)
# MAKE_CELL(10)
n = len(x)
if len(y) < n:
raise StatisticsError('linear regression requires that both inputs have same number of data points')
if None < 2:
raise StatisticsError('linear regression requires at least two data points')
if None:
sxy = (lambda .0: def <genexpr>():
# Return a generator
for xi, yi in .0:
xi * yiNone)(zip(x, y)())
sxx = (lambda .0: def <genexpr>():
# Return a generator
for xi in .0:
xi * xiNone)(x())
else:
xbar = fsum(x) / n
ybar = fsum(y) / n
sxy = (lambda .0 = None: # COPY_FREE_VARS(2)# Return a generator
for xi, yi in .0:
(xi - xbar) * (yi - ybar)None)(zip(x, y)())
sxx = (lambda .0 = None: # COPY_FREE_VARS(2)# Return a generator
for xi in .0:
d = xi - xbar(xi - xbar) * dNone)(x())
slope = sxy / sxx
def _normal_dist_inv_cdf(p, mu, sigma):
q = p - 0.5
if fabs(q) < 0.425:
r = 0.180625 - q * q
num = (((((((2509.08 * r + 33430.6) * r + 67265.8) * r + 45922) * r + 13731.7) * r + 1971.59) * r + 133.142) * r + 3.38713) * q
den = ((((((5226.5 * r + 28729.1) * r + 39307.9) * r + 21213.8) * r + 5394.2) * r + 687.187) * r + 42.3133) * r + 1
x = num / den
return mu + x * sigma
r = p if None < 0 else 1 - p
r = sqrt(-log(r))
if r < 5:
r = r - 1.6
num = ((((((0.000774545 * r + 0.0227238) * r + 0.241781) * r + 1.27046) * r + 3.64785) * r + 5.7695) * r + 4.63034) * r + 1.42344
den = ((((((1.05075e-09 * r + 0.000547594) * r + 0.0151987) * r + 0.148104) * r + 0.689767) * r + 1.67638) * r + 2.05319) * r + 1
else:
r = r - 5
num = ((((((2.01033e-07 * r + 2.71156e-05) * r + 0.00124266) * r + 0.0265322) * r + 0.296561) * r + 1.78483) * r + 5.46378) * r + 6.6579
den = ((((((2.04426e-15 * r + 1.42151e-07) * r + 1.84632e-05) * r + 0.000786869) * r + 0.0148754) * r + 0.13693) * r + 0.599832) * r + 1
x = num / den
if q < 0:
x = -x
return mu + x * sigma
from _statistics import _normal_dist_inv_cdf