Skip to content

Commit

Permalink
Include the worker's name in the http.server.duration metric
Browse files Browse the repository at this point in the history
closes #5844
  • Loading branch information
lubosmj authored and dkliban committed Oct 3, 2024
1 parent dcad800 commit 4009116
Show file tree
Hide file tree
Showing 5 changed files with 35 additions and 10 deletions.
1 change: 1 addition & 0 deletions CHANGES/5844.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Included the worker's name in the ``http.server.duration`` OpenTelemetry metric attributes.
6 changes: 6 additions & 0 deletions pulpcore/app/util.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import hashlib
import zlib
import os
import socket
import tempfile
import gnupg

Expand Down Expand Up @@ -658,6 +659,11 @@ def init_domain_metrics_exporter():
DomainMetricsEmitter.build(domain)


@lru_cache(maxsize=1)
def get_worker_name():
return f"{os.getpid()}@{socket.gethostname()}"


class PGAdvisoryLock:
"""
A context manager that will hold a postgres advisory lock non-blocking.
Expand Down
25 changes: 24 additions & 1 deletion pulpcore/app/wsgi.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,37 @@

from django.core.wsgi import get_wsgi_application
from opentelemetry.instrumentation.wsgi import OpenTelemetryMiddleware
from opentelemetry.exporter.otlp.proto.http.metric_exporter import (
OTLPMetricExporter,
)
from opentelemetry.sdk.metrics import MeterProvider
from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader

from pulpcore.app.entrypoint import using_pulp_api_worker
from pulpcore.app.util import get_worker_name

if not using_pulp_api_worker.get(False):
raise RuntimeError("This app must be executed using pulpcore-api entrypoint.")


class WorkerNameMetricsExporter(OTLPMetricExporter):
def export(self, metrics_data, timeout_millis=10_000, **kwargs):
for resource_metric in metrics_data.resource_metrics:
for scope_metric in resource_metric.scope_metrics:
for metric in scope_metric.metrics:
if metric.name == "http.server.duration":
histogram_data = metric.data.data_points[0]
histogram_data.attributes["worker.process"] = get_worker_name()

return super().export(metrics_data, timeout_millis, **kwargs)


exporter = WorkerNameMetricsExporter()
reader = PeriodicExportingMetricReader(exporter)
provider = MeterProvider(metric_readers=[reader])

application = get_wsgi_application()
application = OpenTelemetryMiddleware(application)
application = OpenTelemetryMiddleware(application, meter_provider=provider)

# Disabling Storage metrics until we find a solution to resource usage.
# https://github.com/pulp/pulpcore/issues/5468
Expand Down
3 changes: 2 additions & 1 deletion pulpcore/content/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

from pulpcore.app.apps import pulp_plugin_configs # noqa: E402: module level not at top of file
from pulpcore.app.models import ContentAppStatus # noqa: E402: module level not at top of file
from pulpcore.app.util import get_worker_name # noqa: E402: module level not at top of file

from .handler import Handler # noqa: E402: module level not at top of file
from .authentication import authenticate # noqa: E402: module level not at top of file
Expand All @@ -38,7 +39,7 @@

async def _heartbeat():
content_app_status = None
name = "{pid}@{hostname}".format(pid=os.getpid(), hostname=socket.gethostname())
name = get_worker_name()
heartbeat_interval = settings.CONTENT_APP_TTL // 4
msg = "Content App '{name}' heartbeat written, sleeping for '{interarrival}' seconds".format(
name=name, interarrival=heartbeat_interval
Expand Down
10 changes: 2 additions & 8 deletions pulpcore/content/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,7 @@
from multidict import CIMultiDict
import os
import re
import socket
from gettext import gettext as _
from functools import lru_cache

from aiohttp.client_exceptions import ClientResponseError
from aiohttp.web import FileResponse, StreamResponse, HTTPOk
Expand Down Expand Up @@ -56,6 +54,7 @@
from pulpcore.app.util import ( # noqa: E402: module level not at top of file
MetricsEmitter,
get_domain,
get_worker_name,
cache_key,
)

Expand All @@ -67,11 +66,6 @@
log = logging.getLogger(__name__)


@lru_cache(maxsize=1)
def _get_content_app_name():
return f"{os.getpid()}@{socket.gethostname()}"


class PathNotResolved(HTTPNotFound):
"""
The path could not be resolved to a published file.
Expand Down Expand Up @@ -1167,6 +1161,6 @@ async def finalize():
def _report_served_artifact_size(self, size):
attributes = {
"domain_name": get_domain().name,
"content_app_name": _get_content_app_name(),
"worker_name": get_worker_name(),
}
self.artifacts_size_counter.add(size, attributes)

0 comments on commit 4009116

Please sign in to comment.