From b43e4f18f7bc67a3e939c2712093f9767b038a9e Mon Sep 17 00:00:00 2001 From: Emmanuel T Odeke Date: Thu, 25 Jul 2024 02:39:43 +0300 Subject: [PATCH] fix: use modernized and standardized OpenTelemetry when tracing This change modernizes trace span attributes by using OpenTelemetry's semantic conventions that are standardized and allow for much better common ground adoption by broader systems, even more as Google Cloud Tracing & Monitoring pushes towards OpenTelemetry more. With this change we've made the replacement of these fields, directly with imports from `opentelemetry.semconv.trace.SpanAttributes`, as: * "db.type" => DB_SYSTEM aka "db.system" * "db.url" => DB_CONNECTION_STRING aka "db.connection_string" * "db.instance" => DB_NAME aka "db.name" * "net.host.name" => NET_HOST_NAME aka "net.host.name" While here, also updated opentelemetry-(api, sdk) dependencies to use versions "1.25.0", then opentelemetry-(instrumentation) to "0.46b0" Fixes #1170 Fixes #1173 --- docs/opentelemetry-tracing.rst | 5 +- examples/trace.py | 61 +++++++++++++++++++ .../spanner_v1/_opentelemetry_tracing.py | 14 +++-- google/cloud/spanner_v1/snapshot.py | 11 +++- setup.py | 7 ++- testing/constraints-3.7.txt | 7 ++- tests/_helpers.py | 12 ++++ tests/system/test_session_api.py | 8 +-- tests/unit/test__opentelemetry_tracing.py | 48 +++++++++------ tests/unit/test_batch.py | 17 ++++-- tests/unit/test_session.py | 13 ++-- tests/unit/test_snapshot.py | 20 +++--- tests/unit/test_spanner.py | 16 +++-- tests/unit/test_transaction.py | 17 ++++-- 14 files changed, 190 insertions(+), 66 deletions(-) create mode 100644 examples/trace.py diff --git a/docs/opentelemetry-tracing.rst b/docs/opentelemetry-tracing.rst index 9b3dea276f..8e3b006170 100644 --- a/docs/opentelemetry-tracing.rst +++ b/docs/opentelemetry-tracing.rst @@ -9,6 +9,7 @@ To take advantage of these traces, we first need to install OpenTelemetry: .. code-block:: sh pip install opentelemetry-api opentelemetry-sdk opentelemetry-instrumentation + pip install opentelemetry-exporter-google-cloud # [Optional] Installs the cloud monitoring exporter, however you can use any exporter of your choice pip install opentelemetry-exporter-google-cloud @@ -19,14 +20,14 @@ We also need to tell OpenTelemetry which exporter to use. To export Spanner trac from opentelemetry import trace from opentelemetry.sdk.trace import TracerProvider - from opentelemetry.trace.sampling import ProbabilitySampler + from opentelemetry.sdk.trace.sampling import TraceIdRatioBased from opentelemetry.exporter.cloud_trace import CloudTraceSpanExporter # BatchExportSpanProcessor exports spans to Cloud Trace # in a seperate thread to not block on the main thread from opentelemetry.sdk.trace.export import BatchExportSpanProcessor # Create and export one trace every 1000 requests - sampler = ProbabilitySampler(1/1000) + sampler = TraceIdRatioBased(1/1000) # Use the default tracer provider trace.set_tracer_provider(TracerProvider(sampler=sampler)) trace.get_tracer_provider().add_span_processor( diff --git a/examples/trace.py b/examples/trace.py new file mode 100644 index 0000000000..321156849d --- /dev/null +++ b/examples/trace.py @@ -0,0 +1,61 @@ +# -*- coding: utf-8 -*- +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License + +import os +import time + +import google.cloud.spanner as spanner +from opentelemetry.exporter.cloud_trace import CloudTraceSpanExporter +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import BatchExportSpanProcessor +from opentelemetry.sdk.trace.sampling import ALWAYS_ON +from opentelemetry import trace + + +def main(): + # Setup OpenTelemetry, trace and Cloud Trace exporter. + sampler = ALWAYS_ON + tracerProvider = TracerProvider(sampler=sampler) + tracerProvider.add_span_processor( + BatchExportSpanProcessor(CloudTraceSpanExporter())) + trace.set_tracer_provider(tracerProvider) + tracer = trace.get_tracer(__name__) + + # Setup the Cloud Spanner Client. + project_id = os.environ.get('SPANNER_PROJECT_ID') + spanner_client = spanner.Client(project_id) + instance = spanner_client.instance('test-instance') + database = instance.database('test-db') + + # Now run our queries + with tracer.start_as_current_span('QueryDatabase'): + with database.snapshot() as snapshot: + with tracer.start_as_current_span('InformationSchema'): + info_schema = snapshot.execute_sql( + 'SELECT * FROM INFORMATION_SCHEMA.TABLES') + for row in info_schema: + print(row) + + with tracer.start_as_current_span('ServerTimeQuery'): + with database.snapshot() as snapshot: + # Purposefully issue a bad SQL statement to examine exceptions + # that get recorded and a ERROR span status. + data = snapshot.execute_sql('SELECT CURRENT_TIMESTAMPx()') + for row in data: + print(row) + + +if __name__ == '__main__': + main() diff --git a/google/cloud/spanner_v1/_opentelemetry_tracing.py b/google/cloud/spanner_v1/_opentelemetry_tracing.py index 8f9f8559ef..f6b2be7cc6 100644 --- a/google/cloud/spanner_v1/_opentelemetry_tracing.py +++ b/google/cloud/spanner_v1/_opentelemetry_tracing.py @@ -22,8 +22,14 @@ try: from opentelemetry import trace from opentelemetry.trace.status import Status, StatusCode + from opentelemetry.semconv.trace import SpanAttributes HAS_OPENTELEMETRY_INSTALLED = True + DB_SYSTEM = SpanAttributes.DB_SYSTEM + DB_NAME = SpanAttributes.DB_NAME + DB_CONNECTION_STRING = SpanAttributes.DB_CONNECTION_STRING + NET_HOST_NAME = SpanAttributes.NET_HOST_NAME + DB_STATEMENT = spanAttributes.DB_STATEMENT except ImportError: HAS_OPENTELEMETRY_INSTALLED = False @@ -39,10 +45,10 @@ def trace_call(name, session, extra_attributes=None): # Set base attributes that we know for every trace created attributes = { - "db.type": "spanner", - "db.url": SpannerClient.DEFAULT_ENDPOINT, - "db.instance": session._database.name, - "net.host.name": SpannerClient.DEFAULT_ENDPOINT, + DB_SYSTEM: "google.cloud.spanner", + DB_CONNECTION_STRING: SpannerClient.DEFAULT_ENDPOINT, + DB_NAME: session._database.name, + NET_HOST_NAME: SpannerClient.DEFAULT_ENDPOINT, } if extra_attributes: diff --git a/google/cloud/spanner_v1/snapshot.py b/google/cloud/spanner_v1/snapshot.py index 3bc1a746bd..05e680b2e2 100644 --- a/google/cloud/spanner_v1/snapshot.py +++ b/google/cloud/spanner_v1/snapshot.py @@ -38,7 +38,10 @@ _check_rst_stream_error, _SessionWrapper, ) -from google.cloud.spanner_v1._opentelemetry_tracing import trace_call +from google.cloud.spanner_v1._opentelemetry_tracing import ( + trace_call, + DB_STATEMENT, +) from google.cloud.spanner_v1.streamed import StreamedResultSet from google.cloud.spanner_v1 import RequestOptions @@ -488,7 +491,9 @@ def execute_sql( timeout=timeout, ) - trace_attributes = {"db.statement": sql} + # TODO(@odeke-em): only annotate this span's SQL + # only if we have EXTENDED_TRACING=true enabled. + trace_attributes = {DB_STATEMENT: sql} if self._transaction_id is None: # lock is added to handle the inline begin for first rpc @@ -696,7 +701,7 @@ def partition_query( partition_options=partition_options, ) - trace_attributes = {"db.statement": sql} + trace_attributes = {DB_STATEMENT: sql} with trace_call( "CloudSpanner.PartitionReadWriteTransaction", self._session, diff --git a/setup.py b/setup.py index 98b1a61748..48ee347ea7 100644 --- a/setup.py +++ b/setup.py @@ -47,9 +47,10 @@ ] extras = { "tracing": [ - "opentelemetry-api >= 1.1.0", - "opentelemetry-sdk >= 1.1.0", - "opentelemetry-instrumentation >= 0.20b0, < 0.23dev", + "opentelemetry-api >= 1.25.0", + "opentelemetry-sdk >= 1.25.0", + "opentelemetry-instrumentation >= 0.46b0", + "opentelemetry-semantic-conventions >= 0.46b0", ], "libcst": "libcst >= 0.2.5", } diff --git a/testing/constraints-3.7.txt b/testing/constraints-3.7.txt index 20170203f5..c59c2f9ef1 100644 --- a/testing/constraints-3.7.txt +++ b/testing/constraints-3.7.txt @@ -10,9 +10,10 @@ grpc-google-iam-v1==0.12.4 libcst==0.2.5 proto-plus==1.22.0 sqlparse==0.4.4 -opentelemetry-api==1.1.0 -opentelemetry-sdk==1.1.0 -opentelemetry-instrumentation==0.20b0 +opentelemetry-api==1.25.0 +opentelemetry-sdk==1.25.0 +opentelemetry-instrumentation==0.46b0 +opentelemetry-semantic-conventions==0.46b0 protobuf==3.20.2 deprecated==1.2.14 grpc-interceptor==0.15.4 diff --git a/tests/_helpers.py b/tests/_helpers.py index 42178fd439..915dcf6c44 100644 --- a/tests/_helpers.py +++ b/tests/_helpers.py @@ -10,13 +10,25 @@ ) from opentelemetry.trace.status import StatusCode + from opentelemetry.semconv.trace import SpanAttributes + trace.set_tracer_provider(TracerProvider()) HAS_OPENTELEMETRY_INSTALLED = True + + DB_SYSTEM = SpanAttributes.DB_SYSTEM + DB_NAME = SpanAttributes.DB_NAME + DB_CONNECTION_STRING = SpanAttributes.DB_CONNECTION_STRING + NET_HOST_NAME = SpanAttributes.NET_HOST_NAME + except ImportError: HAS_OPENTELEMETRY_INSTALLED = False StatusCode = mock.Mock() + DB_SYSTEM = "db.system" + DB_NAME = "db.name" + DB_CONNECTION_STRING = "db.connection_string" + NET_HOST_NAME = "net.host.name" _TEST_OT_EXPORTER = None _TEST_OT_PROVIDER_INITIALIZED = False diff --git a/tests/system/test_session_api.py b/tests/system/test_session_api.py index bbe6000aba..753ed12680 100644 --- a/tests/system/test_session_api.py +++ b/tests/system/test_session_api.py @@ -341,10 +341,10 @@ def assert_span_attributes( def _make_attributes(db_instance, **kwargs): attributes = { - "db.type": "spanner", - "db.url": "spanner.googleapis.com", - "net.host.name": "spanner.googleapis.com", - "db.instance": db_instance, + ot_helpers.DB_SYSTEM: "google.cloud.spanner", + ot_helpers.DB_CONNECTION_STRING: "spanner.googleapis.com", + ot_helpers.DB_NAME: db_instance, + ot_helpers.NET_HOST_NAME: "spanner.googleapis.com", } attributes.update(kwargs) diff --git a/tests/unit/test__opentelemetry_tracing.py b/tests/unit/test__opentelemetry_tracing.py index 25870227bf..eb8f875f64 100644 --- a/tests/unit/test__opentelemetry_tracing.py +++ b/tests/unit/test__opentelemetry_tracing.py @@ -12,8 +12,15 @@ from google.api_core.exceptions import GoogleAPICallError from google.cloud.spanner_v1 import _opentelemetry_tracing -from tests._helpers import OpenTelemetryBase, HAS_OPENTELEMETRY_INSTALLED - +from tests._helpers import ( + OpenTelemetryBase, + StatusCode, + DB_SYSTEM, + DB_NAME, + DB_CONNECTION_STRING, + HAS_OPENTELEMETRY_INSTALLED, + NET_HOST_NAME +) def _make_rpc_error(error_cls, trailing_metadata=None): import grpc @@ -51,14 +58,14 @@ class TestTracing(OpenTelemetryBase): def test_trace_call(self): extra_attributes = { "attribute1": "value1", - # Since our database is mocked, we have to override the db.instance parameter so it is a string - "db.instance": "database_name", + # Since our database is mocked, we have to override the DB_NAME parameter so it is a string + DB_NAME: "database_name", } expected_attributes = { - "db.type": "spanner", - "db.url": "spanner.googleapis.com", - "net.host.name": "spanner.googleapis.com", + DB_SYSTEM: "google.cloud.spanner", + DB_CONNECTION_STRING: "spanner.googleapis.com", + NET_HOST_NAME: "spanner.googleapis.com", } expected_attributes.update(extra_attributes) @@ -78,13 +85,14 @@ def test_trace_call(self): self.assertEqual(span.status.status_code, StatusCode.OK) def test_trace_error(self): - extra_attributes = {"db.instance": "database_name"} + extra_attributes = {DB_NAME: "database_name"} expected_attributes = { - "db.type": "spanner", - "db.url": "spanner.googleapis.com", - "net.host.name": "spanner.googleapis.com", + DB_SYSTEM: "google.cloud.spanner", + DB_CONNECTION_STRING: "spanner.googleapis.com", + NET_HOST_NAME: "spanner.googleapis.com", } + expected_attributes.update(extra_attributes) with self.assertRaises(GoogleAPICallError): @@ -104,13 +112,14 @@ def test_trace_error(self): self.assertEqual(span.status.status_code, StatusCode.ERROR) def test_trace_grpc_error(self): - extra_attributes = {"db.instance": "database_name"} + extra_attributes = {DB_NAME: "database_name"} expected_attributes = { - "db.type": "spanner", - "db.url": "spanner.googleapis.com:443", - "net.host.name": "spanner.googleapis.com:443", + DB_SYSTEM: "google.cloud.spanner", + DB_CONNECTION_STRING: "spanner.googleapis.com", + NET_HOST_NAME: "spanner.googleapis.com", } + expected_attributes.update(extra_attributes) with self.assertRaises(GoogleAPICallError): @@ -127,13 +136,14 @@ def test_trace_grpc_error(self): self.assertEqual(span.status.status_code, StatusCode.ERROR) def test_trace_codeless_error(self): - extra_attributes = {"db.instance": "database_name"} + extra_attributes = {DB_NAME: "database_name"} expected_attributes = { - "db.type": "spanner", - "db.url": "spanner.googleapis.com:443", - "net.host.name": "spanner.googleapis.com:443", + DB_SYSTEM: "google.cloud.spanner", + DB_CONNECTION_STRING: "spanner.googleapis.com", + NET_HOST_NAME: "spanner.googleapis.com", } + expected_attributes.update(extra_attributes) with self.assertRaises(GoogleAPICallError): diff --git a/tests/unit/test_batch.py b/tests/unit/test_batch.py index ee96decf5e..85f0bb2aed 100644 --- a/tests/unit/test_batch.py +++ b/tests/unit/test_batch.py @@ -14,7 +14,14 @@ import unittest -from tests._helpers import OpenTelemetryBase, StatusCode +from tests._helpers import ( + OpenTelemetryBase, + StatusCode, + DB_SYSTEM, + DB_NAME, + DB_CONNECTION_STRING, + NET_HOST_NAME, +) from google.cloud.spanner_v1 import RequestOptions TABLE_NAME = "citizens" @@ -24,10 +31,10 @@ ["bharney@example.com", "Bharney", "Rhubble", 31], ] BASE_ATTRIBUTES = { - "db.type": "spanner", - "db.url": "spanner.googleapis.com", - "db.instance": "testing", - "net.host.name": "spanner.googleapis.com", + DB_SYSTEM: "google.cloud.spanner", + DB_CONNECTION_STRING: "spanner.googleapis.com", + DB_NAME: "testing", + NET_HOST_NAME: "spanner.googleapis.com", } diff --git a/tests/unit/test_session.py b/tests/unit/test_session.py index d4052f0ae3..ee1f727868 100644 --- a/tests/unit/test_session.py +++ b/tests/unit/test_session.py @@ -20,6 +20,10 @@ OpenTelemetryBase, StatusCode, HAS_OPENTELEMETRY_INSTALLED, + DB_SYSTEM, + DB_NAME, + DB_CONNECTION_STRING, + NET_HOST_NAME, ) @@ -46,12 +50,11 @@ class TestSession(OpenTelemetryBase): SESSION_NAME = DATABASE_NAME + "/sessions/" + SESSION_ID DATABASE_ROLE = "dummy-role" BASE_ATTRIBUTES = { - "db.type": "spanner", - "db.url": "spanner.googleapis.com", - "db.instance": DATABASE_NAME, - "net.host.name": "spanner.googleapis.com", + DB_SYSTEM: "google.cloud.spanner", + DB_CONNECTION_STRING: "spanner.googleapis.com", + DB_NAME: DATABASE_NAME, + NET_HOST_NAME: "spanner.googleapis.com", } - def _getTargetClass(self): from google.cloud.spanner_v1.session import Session diff --git a/tests/unit/test_snapshot.py b/tests/unit/test_snapshot.py index bf5563dcfd..f636d15a11 100644 --- a/tests/unit/test_snapshot.py +++ b/tests/unit/test_snapshot.py @@ -21,6 +21,10 @@ OpenTelemetryBase, StatusCode, HAS_OPENTELEMETRY_INSTALLED, + DB_SYSTEM, + DB_NAME, + DB_CONNECTION_STRING, + NET_HOST_NAME, ) from google.cloud.spanner_v1.param_types import INT64 from google.api_core.retry import Retry @@ -41,10 +45,10 @@ SECONDS = 3 MICROS = 123456 BASE_ATTRIBUTES = { - "db.type": "spanner", - "db.url": "spanner.googleapis.com", - "db.instance": "testing", - "net.host.name": "spanner.googleapis.com", + DB_SYSTEM: "google.cloud.spanner", + DB_CONNECTION_STRING: "spanner.googleapis.com", + DB_NAME: "testing", + NET_HOST_NAME: "spanner.googleapis.com", } DIRECTED_READ_OPTIONS = { "include_replicas": { @@ -531,10 +535,10 @@ def test_iteration_w_multiple_span_creation(self): self.assertEqual( dict(span.attributes), { - "db.type": "spanner", - "db.url": "spanner.googleapis.com", - "db.instance": "testing", - "net.host.name": "spanner.googleapis.com", + DB_SYSTEM: "google.cloud.spanner", + DB_CONNECTION_STRING: "spanner.googleapis.com", + DB_NAME: "testing", + NET_HOST_NAME: "spanner.googleapis.com", }, ) diff --git a/tests/unit/test_spanner.py b/tests/unit/test_spanner.py index ab5479eb3c..a8d0969802 100644 --- a/tests/unit/test_spanner.py +++ b/tests/unit/test_spanner.py @@ -45,7 +45,13 @@ from google.api_core import gapic_v1 -from tests._helpers import OpenTelemetryBase +from tests._helpers import ( + OpenTelemetryBase, + DB_SYSTEM, + DB_NAME, + DB_CONNECTION_STRING, + NET_HOST_NAME, +) TABLE_NAME = "citizens" COLUMNS = ["email", "first_name", "last_name", "age"] @@ -110,10 +116,10 @@ class TestTransaction(OpenTelemetryBase): TRANSACTION_TAG = "transaction-tag" BASE_ATTRIBUTES = { - "db.type": "spanner", - "db.url": "spanner.googleapis.com", - "db.instance": "testing", - "net.host.name": "spanner.googleapis.com", + DB_SYSTEM: "google.cloud.spanner", + DB_CONNECTION_STRING: "spanner.googleapis.com", + DB_NAME: "testing", + NET_HOST_NAME: "spanner.googleapis.com", } def _getTargetClass(self): diff --git a/tests/unit/test_transaction.py b/tests/unit/test_transaction.py index b40ae8843f..589822a9f6 100644 --- a/tests/unit/test_transaction.py +++ b/tests/unit/test_transaction.py @@ -21,7 +21,14 @@ from google.api_core.retry import Retry from google.api_core import gapic_v1 -from tests._helpers import OpenTelemetryBase, StatusCode +from tests._helpers import ( + OpenTelemetryBase, + StatusCode, + DB_SYSTEM, + DB_NAME, + DB_CONNECTION_STRING, + NET_HOST_NAME, +) TABLE_NAME = "citizens" COLUMNS = ["email", "first_name", "last_name", "age"] @@ -53,10 +60,10 @@ class TestTransaction(OpenTelemetryBase): TRANSACTION_TAG = "transaction-tag" BASE_ATTRIBUTES = { - "db.type": "spanner", - "db.url": "spanner.googleapis.com", - "db.instance": "testing", - "net.host.name": "spanner.googleapis.com", + DB_SYSTEM: "google.cloud.spanner", + DB_CONNECTION_STRING: "spanner.googleapis.com", + DB_NAME: "testing", + NET_HOST_NAME: "spanner.googleapis.com", } def _getTargetClass(self):