diff --git a/lib/charms/tempo_k8s/v1/charm_tracing.py b/lib/charms/tempo_k8s/v1/charm_tracing.py index 01735c2c..39ebcd46 100644 --- a/lib/charms/tempo_k8s/v1/charm_tracing.py +++ b/lib/charms/tempo_k8s/v1/charm_tracing.py @@ -126,15 +126,14 @@ def my_tracing_endpoint(self) -> Optional[str]: from opentelemetry.sdk.resources import Resource from opentelemetry.sdk.trace import Span, TracerProvider from opentelemetry.sdk.trace.export import BatchSpanProcessor +from opentelemetry.trace import INVALID_SPAN, Tracer +from opentelemetry.trace import get_current_span as otlp_get_current_span from opentelemetry.trace import ( - INVALID_SPAN, - Tracer, get_tracer, get_tracer_provider, set_span_in_context, set_tracer_provider, ) -from opentelemetry.trace import get_current_span as otlp_get_current_span from ops.charm import CharmBase from ops.framework import Framework @@ -147,9 +146,9 @@ def my_tracing_endpoint(self) -> Optional[str]: # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 4 +LIBPATCH = 5 -PYDEPS = ["opentelemetry-exporter-otlp-proto-http>=1.21.0"] +PYDEPS = ["opentelemetry-exporter-otlp-proto-http==1.21.0"] logger = logging.getLogger("tracing") @@ -241,8 +240,8 @@ def _get_tracing_endpoint(tracing_endpoint_getter, self, charm): if tracing_endpoint is None: logger.debug( - "Charm tracing is disabled. Tracing endpoint is not defined - " - "tracing is not available or relation is not set." + f"{charm}.{tracing_endpoint_getter} returned None; quietly disabling " + f"charm_tracing for the run." ) return elif not isinstance(tracing_endpoint, str): @@ -311,7 +310,18 @@ def wrap_init(self: CharmBase, framework: Framework, *args, **kwargs): } ) provider = TracerProvider(resource=resource) - tracing_endpoint = _get_tracing_endpoint(tracing_endpoint_getter, self, charm) + try: + tracing_endpoint = _get_tracing_endpoint(tracing_endpoint_getter, self, charm) + except Exception: + # if anything goes wrong with retrieving the endpoint, we go on with tracing disabled. + # better than breaking the charm. + logger.exception( + f"exception retrieving the tracing " + f"endpoint from {charm}.{tracing_endpoint_getter}; " + f"proceeding with charm_tracing DISABLED. " + ) + return + if not tracing_endpoint: return diff --git a/lib/charms/tempo_k8s/v2/tracing.py b/lib/charms/tempo_k8s/v2/tracing.py index 617ab21c..d466531e 100644 --- a/lib/charms/tempo_k8s/v2/tracing.py +++ b/lib/charms/tempo_k8s/v2/tracing.py @@ -104,7 +104,7 @@ def __init__(self, *args): # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 1 +LIBPATCH = 2 PYDEPS = ["pydantic"] @@ -817,8 +817,13 @@ def get_endpoint( endpoint = self._get_endpoint(relation or self._relation, protocol=protocol) if not endpoint: requested_protocols = set() - for relation in self.relations: - databag = TracingRequirerAppData.load(relation.data[self._charm.app]) + relations = [relation] if relation else self.relations + for relation in relations: + try: + databag = TracingRequirerAppData.load(relation.data[self._charm.app]) + except DataValidationError: + continue + requested_protocols.update(databag.receivers) if protocol not in requested_protocols: diff --git a/requirements.txt b/requirements.txt index edd28ee9..e5a14576 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,7 +7,7 @@ lightkube-models==1.24.1.4 # PYDEPS # lib/charms/tempo_k8s/v1/charm_tracing.py -opentelemetry-exporter-otlp-proto-http +opentelemetry-exporter-otlp-proto-http==1.21.0 # lib/charms/tempo_k8s/v1/tracing.py requires pydantic; we have higher standards: pydantic>=2 diff --git a/src/charm.py b/src/charm.py index 5e94479b..9f420d55 100755 --- a/src/charm.py +++ b/src/charm.py @@ -30,6 +30,7 @@ ) from ops.main import main from ops.model import ActiveStatus, MaintenanceStatus, Relation, WaitingStatus + from tempo import Tempo logger = logging.getLogger(__name__) diff --git a/tests/interface/conftest.py b/tests/interface/conftest.py index 49841b62..4941189c 100644 --- a/tests/interface/conftest.py +++ b/tests/interface/conftest.py @@ -3,12 +3,13 @@ from unittest.mock import patch import pytest -from charm import TempoCharm from charms.tempo_k8s.v1.charm_tracing import charm_tracing_disabled from interface_tester import InterfaceTester from ops.pebble import Layer from scenario.state import Container, State +from charm import TempoCharm + # Interface tests are centrally hosted at https://github.com/canonical/charm-relation-interfaces. # this fixture is used by the test runner of charm-relation-interfaces to test tempo's compliance diff --git a/tests/scenario/conftest.py b/tests/scenario/conftest.py index 726d9e3b..36de650a 100644 --- a/tests/scenario/conftest.py +++ b/tests/scenario/conftest.py @@ -1,9 +1,10 @@ from unittest.mock import patch import pytest -from charm import TempoCharm from scenario import Context +from charm import TempoCharm + @pytest.fixture def tempo_charm(): diff --git a/tests/scenario/test_charm_tracing.py b/tests/scenario/test_charm_tracing.py index bc33a8ab..2bfbed80 100644 --- a/tests/scenario/test_charm_tracing.py +++ b/tests/scenario/test_charm_tracing.py @@ -3,8 +3,16 @@ from unittest.mock import patch import pytest +import scenario from charms.tempo_k8s.v1.charm_tracing import CHARM_TRACING_ENABLED from charms.tempo_k8s.v1.charm_tracing import _autoinstrument as autoinstrument +from charms.tempo_k8s.v2.tracing import ( + ProtocolNotRequestedError, + Receiver, + TracingEndpointRequirer, + TracingProviderAppData, + TracingRequirerAppData, +) from ops import EventBase, EventSource, Framework from ops.charm import CharmBase, CharmEvents from scenario import Context, State @@ -147,7 +155,7 @@ def test_base_tracer_endpoint_disabled(caplog): ctx = Context(MyCharmSimpleDisabled, meta=MyCharmSimpleDisabled.META) ctx.run("start", State()) - assert "Charm tracing is disabled." in caplog.text + assert "quietly disabling charm_tracing for the run." in caplog.text assert not f.called @@ -338,3 +346,114 @@ def test_base_tracer_endpoint_custom_event(caplog): assert span.parent assert span.parent.trace_id assert len({(span.parent.trace_id if span.parent else 0) for span in spans}) == 2 + + +class MyRemoteCharm(CharmBase): + META = {"name": "charlie", "requires": {"tracing": {"interface": "tracing", "limit": 1}}} + _request = True + + def __init__(self, framework: Framework): + super().__init__(framework) + self.tracing = TracingEndpointRequirer( + self, "tracing", protocols=(["otlp_http"] if self._request else []) + ) + + def tempo(self): + return self.tracing.get_endpoint("otlp_http") + + +autoinstrument(MyRemoteCharm, MyRemoteCharm.tempo) + + +@pytest.mark.parametrize("leader", (True, False)) +def test_tracing_requirer_remote_charm_request_response(leader): + # IF the leader unit (whoever it is) did request the endpoint to be activated + MyRemoteCharm._request = True + ctx = Context(MyRemoteCharm, meta=MyRemoteCharm.META) + # WHEN you get any event AND the remote unit has already replied + tracing = scenario.Relation( + "tracing", + # if we're not leader, assume the leader did its part already + local_app_data=TracingRequirerAppData(receivers=["otlp_http"]).dump() + if not leader + else {}, + remote_app_data=TracingProviderAppData( + host="foo.com", receivers=[Receiver(port=80, protocol="otlp_http")] + ).dump(), + ) + with ctx.manager("start", State(leader=leader, relations=[tracing])) as mgr: + # THEN you're good + assert mgr.charm.tempo() == "http://foo.com:80" + + +@pytest.mark.parametrize("leader", (True, False)) +def test_tracing_requirer_remote_charm_no_request_but_response(leader): + # IF the leader did NOT request the endpoint to be activated + MyRemoteCharm._request = False + ctx = Context(MyRemoteCharm, meta=MyRemoteCharm.META) + # WHEN you get any event AND the remote unit has already replied + tracing = scenario.Relation( + "tracing", + # empty local app data + remote_app_data=TracingProviderAppData( + host="foo.com", + # but the remote end has sent the data you need + receivers=[Receiver(port=80, protocol="otlp_http")], + ).dump(), + ) + with ctx.manager("start", State(leader=leader, relations=[tracing])) as mgr: + # THEN you're lucky, but you're good + assert mgr.charm.tempo() == "http://foo.com:80" + + +@pytest.mark.parametrize("relation", (True, False)) +@pytest.mark.parametrize("leader", (True, False)) +def test_tracing_requirer_remote_charm_no_request_no_response(leader, relation): + """Verify that the charm successfully executes (with charm_tracing disabled) if the tempo() call raises.""" + # IF the leader did NOT request the endpoint to be activated + MyRemoteCharm._request = False + ctx = Context(MyRemoteCharm, meta=MyRemoteCharm.META) + # WHEN you get any event + if relation: + # AND you have an empty relation + tracing = scenario.Relation( + "tracing", + # empty local and remote app data + ) + relations = [tracing] + else: + # OR no relation at all + relations = [] + + # THEN you're not totally good: self.tempo() will raise, but charm exec will still exit 0 + with ctx.manager("start", State(leader=leader, relations=relations)) as mgr: + with pytest.raises(ProtocolNotRequestedError): + assert mgr.charm.tempo() is None + + +class MyRemoteBorkyCharm(CharmBase): + META = {"name": "charlie", "requires": {"tracing": {"interface": "tracing", "limit": 1}}} + _borky_return_value = None + + def tempo(self): + return self._borky_return_value + + +autoinstrument(MyRemoteBorkyCharm, MyRemoteBorkyCharm.tempo) + + +@pytest.mark.parametrize("borky_return_value", (True, 42, object(), 0.2, [], (), {})) +def test_borky_tempo_return_value(borky_return_value, caplog): + """Verify that the charm exits 0 (with charm_tracing disabled) if the tempo() call returns bad values.""" + # IF the charm's tempo endpoint getter returns anything but None or str + MyRemoteBorkyCharm._borky_return_value = borky_return_value + ctx = Context(MyRemoteBorkyCharm, meta=MyRemoteBorkyCharm.META) + # WHEN you get any event + # THEN you're not totally good: self.tempo() will raise, but charm exec will still exit 0 + + ctx.run("start", State()) + # traceback from the TypeError raised by _get_tracing_endpoint + assert "should return a tempo endpoint" in caplog.text + # logger.exception in _setup_root_span_initializer + assert "exception retrieving the tracing endpoint from" in caplog.text + assert "proceeding with charm_tracing DISABLED." in caplog.text diff --git a/tests/scenario/test_tracing_legacy.py b/tests/scenario/test_tracing_legacy.py index 8d387927..08fb35f3 100644 --- a/tests/scenario/test_tracing_legacy.py +++ b/tests/scenario/test_tracing_legacy.py @@ -2,12 +2,17 @@ import socket import pytest -from charm import TempoCharm from charms.tempo_k8s.v1.charm_tracing import charm_tracing_disabled -from charms.tempo_k8s.v1.tracing import TracingProviderAppData as TracingProviderAppDataV1 -from charms.tempo_k8s.v2.tracing import TracingProviderAppData as TracingProviderAppDataV2 +from charms.tempo_k8s.v1.tracing import ( + TracingProviderAppData as TracingProviderAppDataV1, +) +from charms.tempo_k8s.v2.tracing import ( + TracingProviderAppData as TracingProviderAppDataV2, +) from scenario import Container, Relation, State +from charm import TempoCharm + NO_RECEIVERS = 13 """Number of supported receivers (incl. deprecated legacy ones).""" NO_RECEIVERS_LEGACY = 6 diff --git a/tests/unit/test_charm.py b/tests/unit/test_charm.py index d8ec5639..100074aa 100644 --- a/tests/unit/test_charm.py +++ b/tests/unit/test_charm.py @@ -1,4 +1,5 @@ import pytest + from tempo import Tempo