| # Copyright 2015 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| import bisect |
| import collections |
| |
| |
| class _Bucketer(object): |
| """Bucketing function for histograms recorded by the Distribution class.""" |
| |
| def __init__(self, width, growth_factor, num_finite_buckets, scale=1.0): |
| """The bucket sizes are controlled by width and growth_factor, and the total |
| number of buckets is set by num_finite_buckets: |
| |
| Args: |
| width: fixed size of each bucket (ignores |scale|). |
| growth_factor: if non-zero, the size of each bucket increases by another |
| multiplicative factor of this factor (see lower bound formula below). |
| num_finite_buckets: the number of finite buckets. There are two |
| additional buckets - an underflow and an overflow bucket - that have |
| lower and upper bounds of Infinity. |
| scale: overall scale factor to apply to buckets, if using geometric |
| buckets. |
| |
| Specify a width for fixed-size buckets or specify a growth_factor for bucket |
| sizes that follow a geometric progression. Specifying both is not valid. |
| |
| For fixed-size buckets:: |
| |
| The i'th bucket covers the interval [(i-1) * width, i * width), where i |
| ranges from 1 to num_finite_buckets, inclusive: |
| |
| bucket number lower bound upper bound |
| i == 0 (underflow) -inf 0 |
| 1 <= i <= num_buckets (i-1) * width i * width |
| i == num_buckets+1 (overflow) (i-1) * width +inf |
| |
| For geometric buckets:: |
| |
| The i'th bucket covers the interval [factor^(i-1), factor^i) * scale |
| where i ranges from 1 to num_finite_buckets inclusive. |
| |
| bucket number lower bound upper bound |
| i == 0 (underflow) -inf scale |
| 1 <= i <= num_buckets factor^(i-1) * scale factor^i * scale |
| i == num_buckets+1 (overflow) factor^(i-1) * scale +inf |
| """ |
| |
| if num_finite_buckets < 0: |
| raise ValueError('num_finite_buckets must be >= 0 (was %d)' % |
| num_finite_buckets) |
| if width != 0 and growth_factor != 0: |
| raise ValueError('a Bucketer must be created with either a width or a ' |
| 'growth factor, not both') |
| |
| self.width = width |
| self.growth_factor = growth_factor |
| self.num_finite_buckets = num_finite_buckets |
| self.total_buckets = num_finite_buckets + 2 |
| self.underflow_bucket = 0 |
| self.overflow_bucket = self.total_buckets - 1 |
| self.scale = scale |
| |
| if width != 0: |
| self._lower_bounds = [float('-Inf')] + self._linear_bounds() |
| else: |
| self._lower_bounds = [float('-Inf')] + self._exponential_bounds() |
| |
| # Sanity check the bucket lower bounds we created. |
| assert len(self._lower_bounds) == self.total_buckets |
| assert all(x < y for x, y in zip( |
| self._lower_bounds, self._lower_bounds[1:])), ( |
| 'bucket boundaries must be monotonically increasing') |
| |
| def __eq__(self, other): |
| return (type(self) is type(other) and |
| self.width == other.width and |
| self.growth_factor == other.growth_factor and |
| self.num_finite_buckets == other.num_finite_buckets and |
| self.scale == other.scale) |
| |
| def _linear_bounds(self): |
| return [self.width * i for i in range(self.num_finite_buckets + 1)] |
| |
| def _exponential_bounds(self): |
| return [ |
| self.scale * self.growth_factor ** i |
| for i in range(self.num_finite_buckets + 1)] |
| |
| def bucket_for_value(self, value): |
| """Returns the index of the bucket that this value belongs to.""" |
| |
| # bisect.bisect_left is wrong because the buckets are of [lower, upper) form |
| return bisect.bisect(self._lower_bounds, value) - 1 |
| |
| def bucket_boundaries(self, bucket): |
| """Returns a tuple that is the [lower, upper) bounds of this bucket. |
| |
| The lower bound of the first bucket is -Infinity, and the upper bound of the |
| last bucket is +Infinity. |
| """ |
| |
| if bucket < 0 or bucket >= self.total_buckets: |
| raise IndexError('bucket %d out of range' % bucket) |
| if bucket == self.total_buckets - 1: |
| return (self._lower_bounds[bucket], float('Inf')) |
| return (self._lower_bounds[bucket], self._lower_bounds[bucket + 1]) |
| |
| |
| def FixedWidthBucketer(width, num_finite_buckets=100): |
| """Convenience function that returns a fixed width Bucketer.""" |
| return _Bucketer(width=width, growth_factor=0.0, |
| num_finite_buckets=num_finite_buckets) |
| |
| |
| def GeometricBucketer(growth_factor=10**0.2, num_finite_buckets=100, |
| scale=1.0): |
| """Convenience function that returns a geometric progression Bucketer.""" |
| return _Bucketer(width=0, growth_factor=growth_factor, |
| num_finite_buckets=num_finite_buckets, scale=scale) |
| |
| |
| class Distribution(object): |
| """Holds a histogram distribution. |
| |
| Buckets are chosen for values by the provided Bucketer. |
| """ |
| |
| def __init__(self, bucketer): |
| self.bucketer = bucketer |
| self.sum = 0 |
| self.count = 0 |
| self.buckets = collections.defaultdict(int) |
| |
| def add(self, value): |
| self.buckets[self.bucketer.bucket_for_value(value)] += 1 |
| self.sum += value |
| self.count += 1 |