Skip to content

Commit

Permalink
Use better histogram buckets (#8263)
Browse files Browse the repository at this point in the history
Use a method of generating buckets based on specifying how many
buckets to use per order of magnitude.

For our bytes field, we use:
(32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072, 262144, 524288, 1048576)

And for querys per connection, we use:
(1.0, 5.0, 10.0, 50.0, 100.0, 500.0, 1000.0, 5000.0, 10000.0)

Fixes #8245.
  • Loading branch information
msullivan authored Jan 29, 2025
1 parent 0fe5693 commit b3a3ed7
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 5 deletions.
33 changes: 28 additions & 5 deletions edb/common/prometheus.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,13 +70,36 @@ def calc_buckets(
"""Calculate histogram buckets on a logarithmic scale."""
# See https://amplitude.com/blog/2014/08/06/optimal-streaming-histograms
# for more details.
# (Says a long standing comment, but this isn't what that post recommends!)
result: list[float] = []
while start <= upper_bound:
result.append(start)
start *= increment_ratio
return tuple(result)


def per_order_buckets(
start: float, end: float,
*,
base: float=10.0,
entries_per_order=4,
) -> tuple[float, ...]:
# See https://amplitude.com/blog/2014/08/06/optimal-streaming-histograms
# for more details.
# (Actually, for this one.)
result: list[float] = [start]

next = start * base
while next <= end:
for i in range(1, entries_per_order):
val = next / entries_per_order * i
if val > result[-1]:
result.append(val)
result.append(next)
next *= base
return tuple(result)


class Unit(enum.Enum):

# https://prometheus.io/docs/practices/naming/#base-units
Expand Down Expand Up @@ -174,7 +197,7 @@ def new_histogram(
/,
*,
unit: Unit | None = None,
buckets: list[float] | None = None,
buckets: typing.Sequence[float] | None = None,
) -> Histogram:
hist = Histogram(self, name, desc, unit, buckets=buckets)
self._add_metric(hist)
Expand All @@ -187,7 +210,7 @@ def new_labeled_histogram(
/,
*,
unit: Unit | None = None,
buckets: list[float] | None = None,
buckets: typing.Sequence[float] | None = None,
labels: tuple[str, ...],
) -> LabeledHistogram:
hist = LabeledHistogram(
Expand Down Expand Up @@ -505,7 +528,7 @@ class BaseHistogram(BaseMetric):
]

def __init__(
self, *args: typing.Any, buckets: list[float] | None = None
self, *args: typing.Any, buckets: typing.Sequence[float] | None = None
) -> None:
if buckets is None:
buckets = self.DEFAULT_BUCKETS
Expand All @@ -530,7 +553,7 @@ class Histogram(BaseHistogram):
_sum: float

def __init__(
self, *args: typing.Any, buckets: list[float] | None = None
self, *args: typing.Any, buckets: typing.Sequence[float] | None = None
) -> None:
super().__init__(*args, buckets=buckets)
self._sum = 0.0
Expand Down Expand Up @@ -581,7 +604,7 @@ class LabeledHistogram(BaseHistogram):
def __init__(
self,
*args: typing.Any,
buckets: list[float] | None = None,
buckets: typing.Sequence[float] | None = None,
labels: tuple[str, ...],
) -> None:
super().__init__(*args, buckets=buckets)
Expand Down
9 changes: 9 additions & 0 deletions edb/server/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,13 @@

registry = prom.Registry(prefix='edgedb_server')

COUNT_BUCKETS = prom.per_order_buckets(
1, 10000, entries_per_order=2,
)
BYTES_BUCKETS = prom.per_order_buckets(
32, 2**20, entries_per_order=1, base=2,
)

compiler_process_spawns = registry.new_counter(
'compiler_process_spawns_total',
'Total number of compiler processes spawned.'
Expand Down Expand Up @@ -144,13 +151,15 @@
queries_per_connection = registry.new_labeled_histogram(
'queries_per_connection',
'Number of queries per connection.',
buckets=COUNT_BUCKETS,
labels=('tenant', 'interface'),
)

query_size = registry.new_labeled_histogram(
'query_size',
'The size of a query.',
unit=prom.Unit.BYTES,
buckets=BYTES_BUCKETS,
labels=('tenant', 'interface'),
)

Expand Down

0 comments on commit b3a3ed7

Please sign in to comment.