Python code coverage for Lib/statistics.py

#	count	content
1	n/a	"""
2	n/a	Basic statistics module.
3	n/a
4	n/a	This module provides functions for calculating statistics of data, including
5	n/a	averages, variance, and standard deviation.
6	n/a
7	n/a	Calculating averages
8	n/a	--------------------
9	n/a
10	n/a	================== =============================================
11	n/a	Function Description
12	n/a	================== =============================================
13	n/a	mean Arithmetic mean (average) of data.
14	n/a	harmonic_mean Harmonic mean of data.
15	n/a	median Median (middle value) of data.
16	n/a	median_low Low median of data.
17	n/a	median_high High median of data.
18	n/a	median_grouped Median, or 50th percentile, of grouped data.
19	n/a	mode Mode (most common value) of data.
20	n/a	================== =============================================
21	n/a
22	n/a	Calculate the arithmetic mean ("the average") of data:
23	n/a
24	n/a	>>> mean([-1.0, 2.5, 3.25, 5.75])
25	n/a	2.625
26	n/a
27	n/a
28	n/a	Calculate the standard median of discrete data:
29	n/a
30	n/a	>>> median([2, 3, 4, 5])
31	n/a	3.5
32	n/a
33	n/a
34	n/a	Calculate the median, or 50th percentile, of data grouped into class intervals
35	n/a	centred on the data values provided. E.g. if your data points are rounded to
36	n/a	the nearest whole number:
37	n/a
38	n/a	>>> median_grouped([2, 2, 3, 3, 3, 4]) #doctest: +ELLIPSIS
39	n/a	2.8333333333...
40	n/a
41	n/a	This should be interpreted in this way: you have two data points in the class
42	n/a	interval 1.5-2.5, three data points in the class interval 2.5-3.5, and one in
43	n/a	the class interval 3.5-4.5. The median of these data points is 2.8333...
44	n/a
45	n/a
46	n/a	Calculating variability or spread
47	n/a	---------------------------------
48	n/a
49	n/a	================== =============================================
50	n/a	Function Description
51	n/a	================== =============================================
52	n/a	pvariance Population variance of data.
53	n/a	variance Sample variance of data.
54	n/a	pstdev Population standard deviation of data.
55	n/a	stdev Sample standard deviation of data.
56	n/a	================== =============================================
57	n/a
58	n/a	Calculate the standard deviation of sample data:
59	n/a
60	n/a	>>> stdev([2.5, 3.25, 5.5, 11.25, 11.75]) #doctest: +ELLIPSIS
61	n/a	4.38961843444...
62	n/a
63	n/a	If you have previously calculated the mean, you can pass it as the optional
64	n/a	second argument to the four "spread" functions to avoid recalculating it:
65	n/a
66	n/a	>>> data = [1, 2, 2, 4, 4, 4, 5, 6]
67	n/a	>>> mu = mean(data)
68	n/a	>>> pvariance(data, mu)
69	n/a	2.5
70	n/a
71	n/a
72	n/a	Exceptions
73	n/a	----------
74	n/a
75	n/a	A single exception is defined: StatisticsError is a subclass of ValueError.
76	n/a
77	n/a	"""
78	n/a
79	n/a	__all__ = [ 'StatisticsError',
80	n/a	'pstdev', 'pvariance', 'stdev', 'variance',
81	n/a	'median', 'median_low', 'median_high', 'median_grouped',
82	n/a	'mean', 'mode', 'harmonic_mean',
83	n/a	]
84	n/a
85	n/a	import collections
86	n/a	import decimal
87	n/a	import math
88	n/a	import numbers
89	n/a
90	n/a	from fractions import Fraction
91	n/a	from decimal import Decimal
92	n/a	from itertools import groupby, chain
93	n/a	from bisect import bisect_left, bisect_right
94	n/a
95	n/a
96	n/a
97	n/a	# === Exceptions ===
98	n/a
99	n/a	class StatisticsError(ValueError):
100	n/a	pass
101	n/a
102	n/a
103	n/a	# === Private utilities ===
104	n/a
105	n/a	def _sum(data, start=0):
106	n/a	"""_sum(data [, start]) -> (type, sum, count)
107	n/a
108	n/a	Return a high-precision sum of the given numeric data as a fraction,
109	n/a	together with the type to be converted to and the count of items.
110	n/a
111	n/a	If optional argument ``start`` is given, it is added to the total.
112	n/a	If ``data`` is empty, ``start`` (defaulting to 0) is returned.
113	n/a
114	n/a
115	n/a	Examples
116	n/a	--------
117	n/a
118	n/a	>>> _sum([3, 2.25, 4.5, -0.5, 1.0], 0.75)
119	n/a	(<class 'float'>, Fraction(11, 1), 5)
120	n/a
121	n/a	Some sources of round-off error will be avoided:
122	n/a
123	n/a	# Built-in sum returns zero.
124	n/a	>>> _sum([1e50, 1, -1e50] * 1000)
125	n/a	(<class 'float'>, Fraction(1000, 1), 3000)
126	n/a
127	n/a	Fractions and Decimals are also supported:
128	n/a
129	n/a	>>> from fractions import Fraction as F
130	n/a	>>> _sum([F(2, 3), F(7, 5), F(1, 4), F(5, 6)])
131	n/a	(<class 'fractions.Fraction'>, Fraction(63, 20), 4)
132	n/a
133	n/a	>>> from decimal import Decimal as D
134	n/a	>>> data = [D("0.1375"), D("0.2108"), D("0.3061"), D("0.0419")]
135	n/a	>>> _sum(data)
136	n/a	(<class 'decimal.Decimal'>, Fraction(6963, 10000), 4)
137	n/a
138	n/a	Mixed types are currently treated as an error, except that int is
139	n/a	allowed.
140	n/a	"""
141	n/a	count = 0
142	n/a	n, d = _exact_ratio(start)
143	n/a	partials = {d: n}
144	n/a	partials_get = partials.get
145	n/a	T = _coerce(int, type(start))
146	n/a	for typ, values in groupby(data, type):
147	n/a	T = _coerce(T, typ) # or raise TypeError
148	n/a	for n,d in map(_exact_ratio, values):
149	n/a	count += 1
150	n/a	partials[d] = partials_get(d, 0) + n
151	n/a	if None in partials:
152	n/a	# The sum will be a NAN or INF. We can ignore all the finite
153	n/a	# partials, and just look at this special one.
154	n/a	total = partials[None]
155	n/a	assert not _isfinite(total)
156	n/a	else:
157	n/a	# Sum all the partial sums using builtin sum.
158	n/a	# FIXME is this faster if we sum them in order of the denominator?
159	n/a	total = sum(Fraction(n, d) for d, n in sorted(partials.items()))
160	n/a	return (T, total, count)
161	n/a
162	n/a
163	n/a	def _isfinite(x):
164	n/a	try:
165	n/a	return x.is_finite() # Likely a Decimal.
166	n/a	except AttributeError:
167	n/a	return math.isfinite(x) # Coerces to float first.
168	n/a
169	n/a
170	n/a	def _coerce(T, S):
171	n/a	"""Coerce types T and S to a common type, or raise TypeError.
172	n/a
173	n/a	Coercion rules are currently an implementation detail. See the CoerceTest
174	n/a	test class in test_statistics for details.
175	n/a	"""
176	n/a	# See http://bugs.python.org/issue24068.
177	n/a	assert T is not bool, "initial type T is bool"
178	n/a	# If the types are the same, no need to coerce anything. Put this
179	n/a	# first, so that the usual case (no coercion needed) happens as soon
180	n/a	# as possible.
181	n/a	if T is S: return T
182	n/a	# Mixed int & other coerce to the other type.
183	n/a	if S is int or S is bool: return T
184	n/a	if T is int: return S
185	n/a	# If one is a (strict) subclass of the other, coerce to the subclass.
186	n/a	if issubclass(S, T): return S
187	n/a	if issubclass(T, S): return T
188	n/a	# Ints coerce to the other type.
189	n/a	if issubclass(T, int): return S
190	n/a	if issubclass(S, int): return T
191	n/a	# Mixed fraction & float coerces to float (or float subclass).
192	n/a	if issubclass(T, Fraction) and issubclass(S, float):
193	n/a	return S
194	n/a	if issubclass(T, float) and issubclass(S, Fraction):
195	n/a	return T
196	n/a	# Any other combination is disallowed.
197	n/a	msg = "don't know how to coerce %s and %s"
198	n/a	raise TypeError(msg % (T.__name__, S.__name__))
199	n/a
200	n/a
201	n/a	def _exact_ratio(x):
202	n/a	"""Return Real number x to exact (numerator, denominator) pair.
203	n/a
204	n/a	>>> _exact_ratio(0.25)
205	n/a	(1, 4)
206	n/a
207	n/a	x is expected to be an int, Fraction, Decimal or float.
208	n/a	"""
209	n/a	try:
210	n/a	# Optimise the common case of floats. We expect that the most often
211	n/a	# used numeric type will be builtin floats, so try to make this as
212	n/a	# fast as possible.
213	n/a	if type(x) is float or type(x) is Decimal:
214	n/a	return x.as_integer_ratio()
215	n/a	try:
216	n/a	# x may be an int, Fraction, or Integral ABC.
217	n/a	return (x.numerator, x.denominator)
218	n/a	except AttributeError:
219	n/a	try:
220	n/a	# x may be a float or Decimal subclass.
221	n/a	return x.as_integer_ratio()
222	n/a	except AttributeError:
223	n/a	# Just give up?
224	n/a	pass
225	n/a	except (OverflowError, ValueError):
226	n/a	# float NAN or INF.
227	n/a	assert not _isfinite(x)
228	n/a	return (x, None)
229	n/a	msg = "can't convert type '{}' to numerator/denominator"
230	n/a	raise TypeError(msg.format(type(x).__name__))
231	n/a
232	n/a
233	n/a	def _convert(value, T):
234	n/a	"""Convert value to given numeric type T."""
235	n/a	if type(value) is T:
236	n/a	# This covers the cases where T is Fraction, or where value is
237	n/a	# a NAN or INF (Decimal or float).
238	n/a	return value
239	n/a	if issubclass(T, int) and value.denominator != 1:
240	n/a	T = float
241	n/a	try:
242	n/a	# FIXME: what do we do if this overflows?
243	n/a	return T(value)
244	n/a	except TypeError:
245	n/a	if issubclass(T, Decimal):
246	n/a	return T(value.numerator)/T(value.denominator)
247	n/a	else:
248	n/a	raise
249	n/a
250	n/a
251	n/a	def _counts(data):
252	n/a	# Generate a table of sorted (value, frequency) pairs.
253	n/a	table = collections.Counter(iter(data)).most_common()
254	n/a	if not table:
255	n/a	return table
256	n/a	# Extract the values with the highest frequency.
257	n/a	maxfreq = table[0][1]
258	n/a	for i in range(1, len(table)):
259	n/a	if table[i][1] != maxfreq:
260	n/a	table = table[:i]
261	n/a	break
262	n/a	return table
263	n/a
264	n/a
265	n/a	def _find_lteq(a, x):
266	n/a	'Locate the leftmost value exactly equal to x'
267	n/a	i = bisect_left(a, x)
268	n/a	if i != len(a) and a[i] == x:
269	n/a	return i
270	n/a	raise ValueError
271	n/a
272	n/a
273	n/a	def _find_rteq(a, l, x):
274	n/a	'Locate the rightmost value exactly equal to x'
275	n/a	i = bisect_right(a, x, lo=l)
276	n/a	if i != (len(a)+1) and a[i-1] == x:
277	n/a	return i-1
278	n/a	raise ValueError
279	n/a
280	n/a
281	n/a	def _fail_neg(values, errmsg='negative value'):
282	n/a	"""Iterate over values, failing if any are less than zero."""
283	n/a	for x in values:
284	n/a	if x < 0:
285	n/a	raise StatisticsError(errmsg)
286	n/a	yield x
287	n/a
288	n/a
289	n/a	# === Measures of central tendency (averages) ===
290	n/a
291	n/a	def mean(data):
292	n/a	"""Return the sample arithmetic mean of data.
293	n/a
294	n/a	>>> mean([1, 2, 3, 4, 4])
295	n/a	2.8
296	n/a
297	n/a	>>> from fractions import Fraction as F
298	n/a	>>> mean([F(3, 7), F(1, 21), F(5, 3), F(1, 3)])
299	n/a	Fraction(13, 21)
300	n/a
301	n/a	>>> from decimal import Decimal as D
302	n/a	>>> mean([D("0.5"), D("0.75"), D("0.625"), D("0.375")])
303	n/a	Decimal('0.5625')
304	n/a
305	n/a	If ``data`` is empty, StatisticsError will be raised.
306	n/a	"""
307	n/a	if iter(data) is data:
308	n/a	data = list(data)
309	n/a	n = len(data)
310	n/a	if n < 1:
311	n/a	raise StatisticsError('mean requires at least one data point')
312	n/a	T, total, count = _sum(data)
313	n/a	assert count == n
314	n/a	return _convert(total/n, T)
315	n/a
316	n/a
317	n/a	def harmonic_mean(data):
318	n/a	"""Return the harmonic mean of data.
319	n/a
320	n/a	The harmonic mean, sometimes called the subcontrary mean, is the
321	n/a	reciprocal of the arithmetic mean of the reciprocals of the data,
322	n/a	and is often appropriate when averaging quantities which are rates
323	n/a	or ratios, for example speeds. Example:
324	n/a
325	n/a	Suppose an investor purchases an equal value of shares in each of
326	n/a	three companies, with P/E (price/earning) ratios of 2.5, 3 and 10.
327	n/a	What is the average P/E ratio for the investor's portfolio?
328	n/a
329	n/a	>>> harmonic_mean([2.5, 3, 10]) # For an equal investment portfolio.
330	n/a	3.6
331	n/a
332	n/a	Using the arithmetic mean would give an average of about 5.167, which
333	n/a	is too high.
334	n/a
335	n/a	If ``data`` is empty, or any element is less than zero,
336	n/a	``harmonic_mean`` will raise ``StatisticsError``.
337	n/a	"""
338	n/a	# For a justification for using harmonic mean for P/E ratios, see
339	n/a	# http://fixthepitch.pellucid.com/comps-analysis-the-missing-harmony-of-summary-statistics/
340	n/a	# http://papers.ssrn.com/sol3/papers.cfm?abstract_id=2621087
341	n/a	if iter(data) is data:
342	n/a	data = list(data)
343	n/a	errmsg = 'harmonic mean does not support negative values'
344	n/a	n = len(data)
345	n/a	if n < 1:
346	n/a	raise StatisticsError('harmonic_mean requires at least one data point')
347	n/a	elif n == 1:
348	n/a	x = data[0]
349	n/a	if isinstance(x, (numbers.Real, Decimal)):
350	n/a	if x < 0:
351	n/a	raise StatisticsError(errmsg)
352	n/a	return x
353	n/a	else:
354	n/a	raise TypeError('unsupported type')
355	n/a	try:
356	n/a	T, total, count = _sum(1/x for x in _fail_neg(data, errmsg))
357	n/a	except ZeroDivisionError:
358	n/a	return 0
359	n/a	assert count == n
360	n/a	return _convert(n/total, T)
361	n/a
362	n/a
363	n/a	# FIXME: investigate ways to calculate medians without sorting? Quickselect?
364	n/a	def median(data):
365	n/a	"""Return the median (middle value) of numeric data.
366	n/a
367	n/a	When the number of data points is odd, return the middle data point.
368	n/a	When the number of data points is even, the median is interpolated by
369	n/a	taking the average of the two middle values:
370	n/a
371	n/a	>>> median([1, 3, 5])
372	n/a	3
373	n/a	>>> median([1, 3, 5, 7])
374	n/a	4.0
375	n/a
376	n/a	"""
377	n/a	data = sorted(data)
378	n/a	n = len(data)
379	n/a	if n == 0:
380	n/a	raise StatisticsError("no median for empty data")
381	n/a	if n%2 == 1:
382	n/a	return data[n//2]
383	n/a	else:
384	n/a	i = n//2
385	n/a	return (data[i - 1] + data[i])/2
386	n/a
387	n/a
388	n/a	def median_low(data):
389	n/a	"""Return the low median of numeric data.
390	n/a
391	n/a	When the number of data points is odd, the middle value is returned.
392	n/a	When it is even, the smaller of the two middle values is returned.
393	n/a
394	n/a	>>> median_low([1, 3, 5])
395	n/a	3
396	n/a	>>> median_low([1, 3, 5, 7])
397	n/a	3
398	n/a
399	n/a	"""
400	n/a	data = sorted(data)
401	n/a	n = len(data)
402	n/a	if n == 0:
403	n/a	raise StatisticsError("no median for empty data")
404	n/a	if n%2 == 1:
405	n/a	return data[n//2]
406	n/a	else:
407	n/a	return data[n//2 - 1]
408	n/a
409	n/a
410	n/a	def median_high(data):
411	n/a	"""Return the high median of data.
412	n/a
413	n/a	When the number of data points is odd, the middle value is returned.
414	n/a	When it is even, the larger of the two middle values is returned.
415	n/a
416	n/a	>>> median_high([1, 3, 5])
417	n/a	3
418	n/a	>>> median_high([1, 3, 5, 7])
419	n/a	5
420	n/a
421	n/a	"""
422	n/a	data = sorted(data)
423	n/a	n = len(data)
424	n/a	if n == 0:
425	n/a	raise StatisticsError("no median for empty data")
426	n/a	return data[n//2]
427	n/a
428	n/a
429	n/a	def median_grouped(data, interval=1):
430	n/a	"""Return the 50th percentile (median) of grouped continuous data.
431	n/a
432	n/a	>>> median_grouped([1, 2, 2, 3, 4, 4, 4, 4, 4, 5])
433	n/a	3.7
434	n/a	>>> median_grouped([52, 52, 53, 54])
435	n/a	52.5
436	n/a
437	n/a	This calculates the median as the 50th percentile, and should be
438	n/a	used when your data is continuous and grouped. In the above example,
439	n/a	the values 1, 2, 3, etc. actually represent the midpoint of classes
440	n/a	0.5-1.5, 1.5-2.5, 2.5-3.5, etc. The middle value falls somewhere in
441	n/a	class 3.5-4.5, and interpolation is used to estimate it.
442	n/a
443	n/a	Optional argument ``interval`` represents the class interval, and
444	n/a	defaults to 1. Changing the class interval naturally will change the
445	n/a	interpolated 50th percentile value:
446	n/a
447	n/a	>>> median_grouped([1, 3, 3, 5, 7], interval=1)
448	n/a	3.25
449	n/a	>>> median_grouped([1, 3, 3, 5, 7], interval=2)
450	n/a	3.5
451	n/a
452	n/a	This function does not check whether the data points are at least
453	n/a	``interval`` apart.
454	n/a	"""
455	n/a	data = sorted(data)
456	n/a	n = len(data)
457	n/a	if n == 0:
458	n/a	raise StatisticsError("no median for empty data")
459	n/a	elif n == 1:
460	n/a	return data[0]
461	n/a	# Find the value at the midpoint. Remember this corresponds to the
462	n/a	# centre of the class interval.
463	n/a	x = data[n//2]
464	n/a	for obj in (x, interval):
465	n/a	if isinstance(obj, (str, bytes)):
466	n/a	raise TypeError('expected number but got %r' % obj)
467	n/a	try:
468	n/a	L = x - interval/2 # The lower limit of the median interval.
469	n/a	except TypeError:
470	n/a	# Mixed type. For now we just coerce to float.
471	n/a	L = float(x) - float(interval)/2
472	n/a
473	n/a	# Uses bisection search to search for x in data with log(n) time complexity
474	n/a	# Find the position of leftmost occurrence of x in data
475	n/a	l1 = _find_lteq(data, x)
476	n/a	# Find the position of rightmost occurrence of x in data[l1...len(data)]
477	n/a	# Assuming always l1 <= l2
478	n/a	l2 = _find_rteq(data, l1, x)
479	n/a	cf = l1
480	n/a	f = l2 - l1 + 1
481	n/a	return L + interval*(n/2 - cf)/f
482	n/a
483	n/a
484	n/a	def mode(data):
485	n/a	"""Return the most common data point from discrete or nominal data.
486	n/a
487	n/a	``mode`` assumes discrete data, and returns a single value. This is the
488	n/a	standard treatment of the mode as commonly taught in schools:
489	n/a
490	n/a	>>> mode([1, 1, 2, 3, 3, 3, 3, 4])
491	n/a	3
492	n/a
493	n/a	This also works with nominal (non-numeric) data:
494	n/a
495	n/a	>>> mode(["red", "blue", "blue", "red", "green", "red", "red"])
496	n/a	'red'
497	n/a
498	n/a	If there is not exactly one most common value, ``mode`` will raise
499	n/a	StatisticsError.
500	n/a	"""
501	n/a	# Generate a table of sorted (value, frequency) pairs.
502	n/a	table = _counts(data)
503	n/a	if len(table) == 1:
504	n/a	return table[0][0]
505	n/a	elif table:
506	n/a	raise StatisticsError(
507	n/a	'no unique mode; found %d equally common values' % len(table)
508	n/a	)
509	n/a	else:
510	n/a	raise StatisticsError('no mode for empty data')
511	n/a
512	n/a
513	n/a	# === Measures of spread ===
514	n/a
515	n/a	# See http://mathworld.wolfram.com/Variance.html
516	n/a	# http://mathworld.wolfram.com/SampleVariance.html
517	n/a	# http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
518	n/a	#
519	n/a	# Under no circumstances use the so-called "computational formula for
520	n/a	# variance", as that is only suitable for hand calculations with a small
521	n/a	# amount of low-precision data. It has terrible numeric properties.
522	n/a	#
523	n/a	# See a comparison of three computational methods here:
524	n/a	# http://www.johndcook.com/blog/2008/09/26/comparing-three-methods-of-computing-standard-deviation/
525	n/a
526	n/a	def _ss(data, c=None):
527	n/a	"""Return sum of square deviations of sequence data.
528	n/a
529	n/a	If ``c`` is None, the mean is calculated in one pass, and the deviations
530	n/a	from the mean are calculated in a second pass. Otherwise, deviations are
531	n/a	calculated from ``c`` as given. Use the second case with care, as it can
532	n/a	lead to garbage results.
533	n/a	"""
534	n/a	if c is None:
535	n/a	c = mean(data)
536	n/a	T, total, count = _sum((x-c)**2 for x in data)
537	n/a	# The following sum should mathematically equal zero, but due to rounding
538	n/a	# error may not.
539	n/a	U, total2, count2 = _sum((x-c) for x in data)
540	n/a	assert T == U and count == count2
541	n/a	total -= total2**2/len(data)
542	n/a	assert not total < 0, 'negative sum of square deviations: %f' % total
543	n/a	return (T, total)
544	n/a
545	n/a
546	n/a	def variance(data, xbar=None):
547	n/a	"""Return the sample variance of data.
548	n/a
549	n/a	data should be an iterable of Real-valued numbers, with at least two
550	n/a	values. The optional argument xbar, if given, should be the mean of
551	n/a	the data. If it is missing or None, the mean is automatically calculated.
552	n/a
553	n/a	Use this function when your data is a sample from a population. To
554	n/a	calculate the variance from the entire population, see ``pvariance``.
555	n/a
556	n/a	Examples:
557	n/a
558	n/a	>>> data = [2.75, 1.75, 1.25, 0.25, 0.5, 1.25, 3.5]
559	n/a	>>> variance(data)
560	n/a	1.3720238095238095
561	n/a
562	n/a	If you have already calculated the mean of your data, you can pass it as
563	n/a	the optional second argument ``xbar`` to avoid recalculating it:
564	n/a
565	n/a	>>> m = mean(data)
566	n/a	>>> variance(data, m)
567	n/a	1.3720238095238095
568	n/a
569	n/a	This function does not check that ``xbar`` is actually the mean of
570	n/a	``data``. Giving arbitrary values for ``xbar`` may lead to invalid or
571	n/a	impossible results.
572	n/a
573	n/a	Decimals and Fractions are supported:
574	n/a
575	n/a	>>> from decimal import Decimal as D
576	n/a	>>> variance([D("27.5"), D("30.25"), D("30.25"), D("34.5"), D("41.75")])
577	n/a	Decimal('31.01875')
578	n/a
579	n/a	>>> from fractions import Fraction as F
580	n/a	>>> variance([F(1, 6), F(1, 2), F(5, 3)])
581	n/a	Fraction(67, 108)
582	n/a
583	n/a	"""
584	n/a	if iter(data) is data:
585	n/a	data = list(data)
586	n/a	n = len(data)
587	n/a	if n < 2:
588	n/a	raise StatisticsError('variance requires at least two data points')
589	n/a	T, ss = _ss(data, xbar)
590	n/a	return _convert(ss/(n-1), T)
591	n/a
592	n/a
593	n/a	def pvariance(data, mu=None):
594	n/a	"""Return the population variance of ``data``.
595	n/a
596	n/a	data should be an iterable of Real-valued numbers, with at least one
597	n/a	value. The optional argument mu, if given, should be the mean of
598	n/a	the data. If it is missing or None, the mean is automatically calculated.
599	n/a
600	n/a	Use this function to calculate the variance from the entire population.
601	n/a	To estimate the variance from a sample, the ``variance`` function is
602	n/a	usually a better choice.
603	n/a
604	n/a	Examples:
605	n/a
606	n/a	>>> data = [0.0, 0.25, 0.25, 1.25, 1.5, 1.75, 2.75, 3.25]
607	n/a	>>> pvariance(data)
608	n/a	1.25
609	n/a
610	n/a	If you have already calculated the mean of the data, you can pass it as
611	n/a	the optional second argument to avoid recalculating it:
612	n/a
613	n/a	>>> mu = mean(data)
614	n/a	>>> pvariance(data, mu)
615	n/a	1.25
616	n/a
617	n/a	This function does not check that ``mu`` is actually the mean of ``data``.
618	n/a	Giving arbitrary values for ``mu`` may lead to invalid or impossible
619	n/a	results.
620	n/a
621	n/a	Decimals and Fractions are supported:
622	n/a
623	n/a	>>> from decimal import Decimal as D
624	n/a	>>> pvariance([D("27.5"), D("30.25"), D("30.25"), D("34.5"), D("41.75")])
625	n/a	Decimal('24.815')
626	n/a
627	n/a	>>> from fractions import Fraction as F
628	n/a	>>> pvariance([F(1, 4), F(5, 4), F(1, 2)])
629	n/a	Fraction(13, 72)
630	n/a
631	n/a	"""
632	n/a	if iter(data) is data:
633	n/a	data = list(data)
634	n/a	n = len(data)
635	n/a	if n < 1:
636	n/a	raise StatisticsError('pvariance requires at least one data point')
637	n/a	T, ss = _ss(data, mu)
638	n/a	return _convert(ss/n, T)
639	n/a
640	n/a
641	n/a	def stdev(data, xbar=None):
642	n/a	"""Return the square root of the sample variance.
643	n/a
644	n/a	See ``variance`` for arguments and other details.
645	n/a
646	n/a	>>> stdev([1.5, 2.5, 2.5, 2.75, 3.25, 4.75])
647	n/a	1.0810874155219827
648	n/a
649	n/a	"""
650	n/a	var = variance(data, xbar)
651	n/a	try:
652	n/a	return var.sqrt()
653	n/a	except AttributeError:
654	n/a	return math.sqrt(var)
655	n/a
656	n/a
657	n/a	def pstdev(data, mu=None):
658	n/a	"""Return the square root of the population variance.
659	n/a
660	n/a	See ``pvariance`` for arguments and other details.
661	n/a
662	n/a	>>> pstdev([1.5, 2.5, 2.5, 2.75, 3.25, 4.75])
663	n/a	0.986893273527251
664	n/a
665	n/a	"""
666	n/a	var = pvariance(data, mu)
667	n/a	try:
668	n/a	return var.sqrt()
669	n/a	except AttributeError:
670	n/a	return math.sqrt(var)