blob: 99b0fb14c641e9c14b40ddcd4c421e1a99ae0379 [file] [log] [blame]
# Copyright 2015 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import bisect
import collections
class _Bucketer(object):
"""Bucketing function for histograms recorded by the Distribution class."""
def __init__(self, width, growth_factor, num_finite_buckets, scale=1.0):
"""The bucket sizes are controlled by width and growth_factor, and the total
number of buckets is set by num_finite_buckets:
Args:
width: fixed size of each bucket (ignores |scale|).
growth_factor: if non-zero, the size of each bucket increases by another
multiplicative factor of this factor (see lower bound formula below).
num_finite_buckets: the number of finite buckets. There are two
additional buckets - an underflow and an overflow bucket - that have
lower and upper bounds of Infinity.
scale: overall scale factor to apply to buckets, if using geometric
buckets.
Specify a width for fixed-size buckets or specify a growth_factor for bucket
sizes that follow a geometric progression. Specifying both is not valid.
For fixed-size buckets::
The i'th bucket covers the interval [(i-1) * width, i * width), where i
ranges from 1 to num_finite_buckets, inclusive:
bucket number lower bound upper bound
i == 0 (underflow) -inf 0
1 <= i <= num_buckets (i-1) * width i * width
i == num_buckets+1 (overflow) (i-1) * width +inf
For geometric buckets::
The i'th bucket covers the interval [factor^(i-1), factor^i) * scale
where i ranges from 1 to num_finite_buckets inclusive.
bucket number lower bound upper bound
i == 0 (underflow) -inf scale
1 <= i <= num_buckets factor^(i-1) * scale factor^i * scale
i == num_buckets+1 (overflow) factor^(i-1) * scale +inf
"""
if num_finite_buckets < 0:
raise ValueError('num_finite_buckets must be >= 0 (was %d)' %
num_finite_buckets)
if width != 0 and growth_factor != 0:
raise ValueError('a Bucketer must be created with either a width or a '
'growth factor, not both')
self.width = width
self.growth_factor = growth_factor
self.num_finite_buckets = num_finite_buckets
self.total_buckets = num_finite_buckets + 2
self.underflow_bucket = 0
self.overflow_bucket = self.total_buckets - 1
self.scale = scale
if width != 0:
self._lower_bounds = [float('-Inf')] + self._linear_bounds()
else:
self._lower_bounds = [float('-Inf')] + self._exponential_bounds()
# Sanity check the bucket lower bounds we created.
assert len(self._lower_bounds) == self.total_buckets
assert all(x < y for x, y in zip(
self._lower_bounds, self._lower_bounds[1:])), (
'bucket boundaries must be monotonically increasing')
def __eq__(self, other):
return (type(self) is type(other) and
self.width == other.width and
self.growth_factor == other.growth_factor and
self.num_finite_buckets == other.num_finite_buckets and
self.scale == other.scale)
def _linear_bounds(self):
return [self.width * i for i in range(self.num_finite_buckets + 1)]
def _exponential_bounds(self):
return [
self.scale * self.growth_factor ** i
for i in range(self.num_finite_buckets + 1)]
def bucket_for_value(self, value):
"""Returns the index of the bucket that this value belongs to."""
# bisect.bisect_left is wrong because the buckets are of [lower, upper) form
return bisect.bisect(self._lower_bounds, value) - 1
def bucket_boundaries(self, bucket):
"""Returns a tuple that is the [lower, upper) bounds of this bucket.
The lower bound of the first bucket is -Infinity, and the upper bound of the
last bucket is +Infinity.
"""
if bucket < 0 or bucket >= self.total_buckets:
raise IndexError('bucket %d out of range' % bucket)
if bucket == self.total_buckets - 1:
return (self._lower_bounds[bucket], float('Inf'))
return (self._lower_bounds[bucket], self._lower_bounds[bucket + 1])
def FixedWidthBucketer(width, num_finite_buckets=100):
"""Convenience function that returns a fixed width Bucketer."""
return _Bucketer(width=width, growth_factor=0.0,
num_finite_buckets=num_finite_buckets)
def GeometricBucketer(growth_factor=10**0.2, num_finite_buckets=100,
scale=1.0):
"""Convenience function that returns a geometric progression Bucketer."""
return _Bucketer(width=0, growth_factor=growth_factor,
num_finite_buckets=num_finite_buckets, scale=scale)
class Distribution(object):
"""Holds a histogram distribution.
Buckets are chosen for values by the provided Bucketer.
"""
def __init__(self, bucketer):
self.bucketer = bucketer
self.sum = 0
self.count = 0
self.buckets = collections.defaultdict(int)
def add(self, value):
self.buckets[self.bucketer.bucket_for_value(value)] += 1
self.sum += value
self.count += 1