From c40a356a93361131f52e89c15fbeaaf80126eeda Mon Sep 17 00:00:00 2001 From: Dragos Misca Date: Mon, 15 Apr 2024 20:42:30 -0700 Subject: [PATCH 01/15] Draft runtime metrics --- conf/pulsar_env.sh | 4 ++++ pulsar-broker/pom.xml | 1 + .../stats/PulsarBrokerOpenTelemetry.java | 23 ++++++++++++++++++- pulsar-opentelemetry/pom.xml | 16 +++++++++++++ .../OpenTelemetryServiceTest.java | 12 ++++++++++ 5 files changed, 55 insertions(+), 1 deletion(-) diff --git a/conf/pulsar_env.sh b/conf/pulsar_env.sh index c7bba23c234d9..5a2a142f3d868 100755 --- a/conf/pulsar_env.sh +++ b/conf/pulsar_env.sh @@ -94,3 +94,7 @@ PULSAR_EXTRA_OPTS="${PULSAR_EXTRA_OPTS:-" -Dpulsar.allocator.exit_on_oom=true -D #Wait time before forcefully kill the pulsar server instance, if the stop is not successful #PULSAR_STOP_TIMEOUT= +# Enable semantically stably telemetry for JVM metrics +OTEL_INSTRUMENTATION_RUNTIME_TELEMETRY_JAVA17_ENABLE_ALL=true +OTEL_INSTRUMENTATION_RUNTIME_TELEMETRY_EMIT_EXPERIMENTAL_TELEMETRY=true +OTEL_SEMCONV_STABILITY_OPT_IN=true \ No newline at end of file diff --git a/pulsar-broker/pom.xml b/pulsar-broker/pom.xml index e15e024ea8158..a5b6d2e6b28d4 100644 --- a/pulsar-broker/pom.xml +++ b/pulsar-broker/pom.xml @@ -616,6 +616,7 @@ org.apache.pulsar.metadata.bookkeeper.PulsarMetadataBookieDriver org.apache.pulsar.metadata.bookkeeper.PulsarMetadataClientDriver + jvm diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/PulsarBrokerOpenTelemetry.java b/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/PulsarBrokerOpenTelemetry.java index 01ca65d2cc537..8052bc436208e 100644 --- a/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/PulsarBrokerOpenTelemetry.java +++ b/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/PulsarBrokerOpenTelemetry.java @@ -20,6 +20,7 @@ import com.google.common.annotations.VisibleForTesting; import io.opentelemetry.api.metrics.Meter; +import io.opentelemetry.instrumentation.runtimemetrics.java17.RuntimeMetrics; import io.opentelemetry.sdk.autoconfigure.AutoConfiguredOpenTelemetrySdkBuilder; import java.io.Closeable; import java.util.function.Consumer; @@ -36,6 +37,8 @@ public class PulsarBrokerOpenTelemetry implements Closeable { @Getter private final Meter meter; + private final RuntimeMetrics runtimeMetrics; + public PulsarBrokerOpenTelemetry(ServiceConfiguration config, @VisibleForTesting Consumer builderCustomizer) { openTelemetryService = OpenTelemetryService.builder() @@ -44,11 +47,29 @@ public PulsarBrokerOpenTelemetry(ServiceConfiguration config, .serviceVersion(PulsarVersion.getVersion()) .builderCustomizer(builderCustomizer) .build(); - meter = openTelemetryService.getOpenTelemetry().getMeter("org.apache.pulsar.broker"); + var openTelemetry = openTelemetryService.getOpenTelemetry(); + + meter = openTelemetry.getMeter("org.apache.pulsar.broker"); + + // For a list of exposed metrics, see https://opentelemetry.io/docs/specs/semconv/runtime/jvm-metrics/ + runtimeMetrics = RuntimeMetrics.builder(openTelemetry) + .enableAllFeatures() + .enableExperimentalJmxTelemetry() + .build(); + + /* + BufferPools.registerObservers(openTelemetry); + Classes.registerObservers(openTelemetry); + Cpu.registerObservers(openTelemetry); + GarbageCollector.registerObservers(openTelemetry); + MemoryPools.registerObservers(openTelemetry); + Threads.registerObservers(openTelemetry); + */ } @Override public void close() { + runtimeMetrics.close(); openTelemetryService.close(); } } diff --git a/pulsar-opentelemetry/pom.xml b/pulsar-opentelemetry/pom.xml index 82a9658cc9d31..f0a835e047f60 100644 --- a/pulsar-opentelemetry/pom.xml +++ b/pulsar-opentelemetry/pom.xml @@ -58,6 +58,10 @@ io.opentelemetry.semconv opentelemetry-semconv + + io.opentelemetry.instrumentation + opentelemetry-runtime-telemetry-java17 + com.google.guava @@ -130,6 +134,18 @@ + + + org.apache.maven.plugins + maven-surefire-plugin + + + true + true + jvm + + + diff --git a/pulsar-opentelemetry/src/test/java/org/apache/pulsar/opentelemetry/OpenTelemetryServiceTest.java b/pulsar-opentelemetry/src/test/java/org/apache/pulsar/opentelemetry/OpenTelemetryServiceTest.java index bf404496a2eca..561b38a5b403c 100644 --- a/pulsar-opentelemetry/src/test/java/org/apache/pulsar/opentelemetry/OpenTelemetryServiceTest.java +++ b/pulsar-opentelemetry/src/test/java/org/apache/pulsar/opentelemetry/OpenTelemetryServiceTest.java @@ -25,6 +25,7 @@ import io.opentelemetry.api.metrics.LongCounterBuilder; import io.opentelemetry.api.metrics.Meter; import io.opentelemetry.instrumentation.resources.JarServiceNameDetector; +import io.opentelemetry.instrumentation.runtimemetrics.java17.RuntimeMetrics; import io.opentelemetry.sdk.autoconfigure.AutoConfiguredOpenTelemetrySdkBuilder; import io.opentelemetry.sdk.common.InstrumentationScopeInfo; import io.opentelemetry.sdk.metrics.export.MetricReader; @@ -198,4 +199,15 @@ public void testServiceIsDisabledByDefault() throws Exception { // Validate that the callback has not being called. assertThat(callback).isFalse(); } + + @Test + public void testJvmRuntimeMetrics() { + var otel = openTelemetryService.getOpenTelemetry(); + + @Cleanup + var runtimeMetrics = RuntimeMetrics.builder(otel).enableAllFeatures().enableExperimentalJmxTelemetry().build(); + + var metrics = reader.collectAllMetrics(); + assertThat(metrics).anySatisfy(metric -> assertThat(metric).hasName("jvm.thread.count")); + } } From 92e0412bef65c7079ef16eeca0aee29d1a5227a1 Mon Sep 17 00:00:00 2001 From: Dragos Misca Date: Mon, 15 Apr 2024 22:12:36 -0700 Subject: [PATCH 02/15] Test more metrics --- pom.xml | 5 ++ .../OpenTelemetryServiceTest.java | 51 ++++++++++++++++++- 2 files changed, 54 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 8a43e536cdb03..dde1c90a0f01f 100644 --- a/pom.xml +++ b/pom.xml @@ -1501,6 +1501,11 @@ flexible messaging model and an intuitive client API. opentelemetry-resources ${opentelemetry.instrumentation.version} + + io.opentelemetry.instrumentation + opentelemetry-runtime-telemetry-java17 + ${opentelemetry.instrumentation.version} + io.opentelemetry.semconv opentelemetry-semconv diff --git a/pulsar-opentelemetry/src/test/java/org/apache/pulsar/opentelemetry/OpenTelemetryServiceTest.java b/pulsar-opentelemetry/src/test/java/org/apache/pulsar/opentelemetry/OpenTelemetryServiceTest.java index 561b38a5b403c..43cf67e93fb80 100644 --- a/pulsar-opentelemetry/src/test/java/org/apache/pulsar/opentelemetry/OpenTelemetryServiceTest.java +++ b/pulsar-opentelemetry/src/test/java/org/apache/pulsar/opentelemetry/OpenTelemetryServiceTest.java @@ -28,9 +28,11 @@ import io.opentelemetry.instrumentation.runtimemetrics.java17.RuntimeMetrics; import io.opentelemetry.sdk.autoconfigure.AutoConfiguredOpenTelemetrySdkBuilder; import io.opentelemetry.sdk.common.InstrumentationScopeInfo; +import io.opentelemetry.sdk.metrics.data.MetricData; import io.opentelemetry.sdk.metrics.export.MetricReader; import io.opentelemetry.sdk.testing.exporter.InMemoryMetricReader; import io.opentelemetry.semconv.ResourceAttributes; +import java.util.Comparator; import java.util.List; import java.util.Map; import java.util.concurrent.TimeUnit; @@ -205,9 +207,54 @@ public void testJvmRuntimeMetrics() { var otel = openTelemetryService.getOpenTelemetry(); @Cleanup - var runtimeMetrics = RuntimeMetrics.builder(otel).enableAllFeatures().enableExperimentalJmxTelemetry().build(); + var runtimeMetrics = RuntimeMetrics.builder(otel) + .enableAllFeatures() + .enableExperimentalJmxTelemetry() + .build(); + + // Attempt collection of GC metrics + Runtime.getRuntime().gc(); + + var metrics = reader.collectAllMetrics().stream().sorted(Comparator.comparing(MetricData::getName)).toList(); + + // Process Metrics + // Replaces process_cpu_seconds_total + assertThat(metrics).anySatisfy(metric -> assertThat(metric).hasName("jvm.cpu.time")); - var metrics = reader.collectAllMetrics(); + // Memory Metrics + // Replaces jvm_memory_bytes_used + assertThat(metrics).anySatisfy(metric -> assertThat(metric).hasName("jvm.memory.used")); + // Replaces jvm_memory_bytes_committed + assertThat(metrics).anySatisfy(metric -> assertThat(metric).hasName("jvm.memory.committed")); + // Replaces jvm_memory_bytes_max + assertThat(metrics).anySatisfy(metric -> assertThat(metric).hasName("jvm.memory.limit")); + // Replaces jvm_memory_bytes_init + assertThat(metrics).anySatisfy(metric -> assertThat(metric).hasName("jvm.memory.init")); + // Replaces jvm_memory_pool_allocated_bytes_total + assertThat(metrics).anySatisfy(metric -> assertThat(metric).hasName("jvm.memory.used_after_last_gc")); + + // Buffer Pool Metrics + // Replaces jvm_buffer_pool_used_bytes + assertThat(metrics).anySatisfy(metric -> assertThat(metric).hasName("jvm.buffer.memory.usage")); + // Replaces jvm_buffer_pool_capacity_bytes + assertThat(metrics).anySatisfy(metric -> assertThat(metric).hasName("jvm.buffer.memory.limit")); + // Replaces jvm_buffer_pool_used_buffers + assertThat(metrics).anySatisfy(metric -> assertThat(metric).hasName("jvm.buffer.count")); + + // Garbage Collector Metrics + // Replaces jvm_gc_collection_seconds + assertThat(metrics).anySatisfy(metric -> assertThat(metric).hasName("jvm.gc.duration")); + + // Thread Metrics + // Replaces jvm_threads_state, jvm_threads_current and jvm_threads_daemon assertThat(metrics).anySatisfy(metric -> assertThat(metric).hasName("jvm.thread.count")); + + // Class Loading Metrics + // Replaces jvm_classes_currently_loaded + assertThat(metrics).anySatisfy(metric -> assertThat(metric).hasName("jvm.class.count")); + // Replaces jvm_classes_loaded_total + assertThat(metrics).anySatisfy(metric -> assertThat(metric).hasName("jvm.class.loaded")); + // Replaces jvm_classes_unloaded_total + assertThat(metrics).anySatisfy(metric -> assertThat(metric).hasName("jvm.class.unloaded")); } } From 7aa4ea486200acf613ea9aac1380f0268ebe546e Mon Sep 17 00:00:00 2001 From: Dragos Misca Date: Fri, 19 Apr 2024 10:23:26 -0700 Subject: [PATCH 03/15] Comment test --- .../apache/pulsar/opentelemetry/OpenTelemetryServiceTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pulsar-opentelemetry/src/test/java/org/apache/pulsar/opentelemetry/OpenTelemetryServiceTest.java b/pulsar-opentelemetry/src/test/java/org/apache/pulsar/opentelemetry/OpenTelemetryServiceTest.java index 43cf67e93fb80..d5e85c4a50f76 100644 --- a/pulsar-opentelemetry/src/test/java/org/apache/pulsar/opentelemetry/OpenTelemetryServiceTest.java +++ b/pulsar-opentelemetry/src/test/java/org/apache/pulsar/opentelemetry/OpenTelemetryServiceTest.java @@ -212,7 +212,7 @@ public void testJvmRuntimeMetrics() { .enableExperimentalJmxTelemetry() .build(); - // Attempt collection of GC metrics + // Attempt collection of GC metrics. The metrics should be populated regardless if GC is triggered or not. Runtime.getRuntime().gc(); var metrics = reader.collectAllMetrics().stream().sorted(Comparator.comparing(MetricData::getName)).toList(); From d1c0ec4eeedd3e7a4f377b833bcda4e29a59aac4 Mon Sep 17 00:00:00 2001 From: Dragos Misca Date: Thu, 25 Apr 2024 10:09:13 -0700 Subject: [PATCH 04/15] Move RuntimeMetrics instance to OpenTelemetryService class --- .../stats/PulsarBrokerOpenTelemetry.java | 19 ------------------- .../opentelemetry/OpenTelemetryService.java | 9 +++++++++ 2 files changed, 9 insertions(+), 19 deletions(-) diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/PulsarBrokerOpenTelemetry.java b/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/PulsarBrokerOpenTelemetry.java index 8052bc436208e..0bbfd9c76f8be 100644 --- a/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/PulsarBrokerOpenTelemetry.java +++ b/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/PulsarBrokerOpenTelemetry.java @@ -20,7 +20,6 @@ import com.google.common.annotations.VisibleForTesting; import io.opentelemetry.api.metrics.Meter; -import io.opentelemetry.instrumentation.runtimemetrics.java17.RuntimeMetrics; import io.opentelemetry.sdk.autoconfigure.AutoConfiguredOpenTelemetrySdkBuilder; import java.io.Closeable; import java.util.function.Consumer; @@ -37,8 +36,6 @@ public class PulsarBrokerOpenTelemetry implements Closeable { @Getter private final Meter meter; - private final RuntimeMetrics runtimeMetrics; - public PulsarBrokerOpenTelemetry(ServiceConfiguration config, @VisibleForTesting Consumer builderCustomizer) { openTelemetryService = OpenTelemetryService.builder() @@ -50,26 +47,10 @@ public PulsarBrokerOpenTelemetry(ServiceConfiguration config, var openTelemetry = openTelemetryService.getOpenTelemetry(); meter = openTelemetry.getMeter("org.apache.pulsar.broker"); - - // For a list of exposed metrics, see https://opentelemetry.io/docs/specs/semconv/runtime/jvm-metrics/ - runtimeMetrics = RuntimeMetrics.builder(openTelemetry) - .enableAllFeatures() - .enableExperimentalJmxTelemetry() - .build(); - - /* - BufferPools.registerObservers(openTelemetry); - Classes.registerObservers(openTelemetry); - Cpu.registerObservers(openTelemetry); - GarbageCollector.registerObservers(openTelemetry); - MemoryPools.registerObservers(openTelemetry); - Threads.registerObservers(openTelemetry); - */ } @Override public void close() { - runtimeMetrics.close(); openTelemetryService.close(); } } diff --git a/pulsar-opentelemetry/src/main/java/org/apache/pulsar/opentelemetry/OpenTelemetryService.java b/pulsar-opentelemetry/src/main/java/org/apache/pulsar/opentelemetry/OpenTelemetryService.java index 16c4264be6d12..37d2256df4b42 100644 --- a/pulsar-opentelemetry/src/main/java/org/apache/pulsar/opentelemetry/OpenTelemetryService.java +++ b/pulsar-opentelemetry/src/main/java/org/apache/pulsar/opentelemetry/OpenTelemetryService.java @@ -21,6 +21,7 @@ import static com.google.common.base.Preconditions.checkArgument; import com.google.common.annotations.VisibleForTesting; import io.opentelemetry.api.OpenTelemetry; +import io.opentelemetry.instrumentation.runtimemetrics.java17.RuntimeMetrics; import io.opentelemetry.sdk.OpenTelemetrySdk; import io.opentelemetry.sdk.autoconfigure.AutoConfiguredOpenTelemetrySdk; import io.opentelemetry.sdk.autoconfigure.AutoConfiguredOpenTelemetrySdkBuilder; @@ -44,6 +45,8 @@ public class OpenTelemetryService implements Closeable { private final OpenTelemetrySdk openTelemetrySdk; + private final RuntimeMetrics runtimeMetrics; + /** * Instantiates the OpenTelemetry SDK. All attributes are overridden by system properties or environment * variables. @@ -95,6 +98,9 @@ public OpenTelemetryService(String clusterName, } openTelemetrySdk = sdkBuilder.build().getOpenTelemetrySdk(); + + // For a list of exposed metrics, see https://opentelemetry.io/docs/specs/semconv/runtime/jvm-metrics/ + runtimeMetrics = RuntimeMetrics.builder(openTelemetrySdk).enableAllFeatures().build(); } public OpenTelemetry getOpenTelemetry() { @@ -103,6 +109,9 @@ public OpenTelemetry getOpenTelemetry() { @Override public void close() { + if (runtimeMetrics != null) { + runtimeMetrics.close(); + } openTelemetrySdk.close(); } } From 209f75fe3776d07e0be5ae42dce32cd41b4fbbab Mon Sep 17 00:00:00 2001 From: Dragos Misca Date: Thu, 25 Apr 2024 10:18:19 -0700 Subject: [PATCH 05/15] Enable experimental JMX metrics --- .../apache/pulsar/opentelemetry/OpenTelemetryService.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pulsar-opentelemetry/src/main/java/org/apache/pulsar/opentelemetry/OpenTelemetryService.java b/pulsar-opentelemetry/src/main/java/org/apache/pulsar/opentelemetry/OpenTelemetryService.java index 37d2256df4b42..14fdc2965bebb 100644 --- a/pulsar-opentelemetry/src/main/java/org/apache/pulsar/opentelemetry/OpenTelemetryService.java +++ b/pulsar-opentelemetry/src/main/java/org/apache/pulsar/opentelemetry/OpenTelemetryService.java @@ -100,7 +100,10 @@ public OpenTelemetryService(String clusterName, openTelemetrySdk = sdkBuilder.build().getOpenTelemetrySdk(); // For a list of exposed metrics, see https://opentelemetry.io/docs/specs/semconv/runtime/jvm-metrics/ - runtimeMetrics = RuntimeMetrics.builder(openTelemetrySdk).enableAllFeatures().build(); + runtimeMetrics = RuntimeMetrics.builder(openTelemetrySdk) + .enableAllFeatures() + .enableExperimentalJmxTelemetry() + .build(); } public OpenTelemetry getOpenTelemetry() { From 48ef97819a0792164e13f18cd4dbdea39a377d05 Mon Sep 17 00:00:00 2001 From: Dragos Misca Date: Thu, 25 Apr 2024 10:18:36 -0700 Subject: [PATCH 06/15] Test Cleanup --- .../opentelemetry/OpenTelemetryServiceTest.java | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/pulsar-opentelemetry/src/test/java/org/apache/pulsar/opentelemetry/OpenTelemetryServiceTest.java b/pulsar-opentelemetry/src/test/java/org/apache/pulsar/opentelemetry/OpenTelemetryServiceTest.java index d5e85c4a50f76..1273ff6a9305c 100644 --- a/pulsar-opentelemetry/src/test/java/org/apache/pulsar/opentelemetry/OpenTelemetryServiceTest.java +++ b/pulsar-opentelemetry/src/test/java/org/apache/pulsar/opentelemetry/OpenTelemetryServiceTest.java @@ -25,7 +25,6 @@ import io.opentelemetry.api.metrics.LongCounterBuilder; import io.opentelemetry.api.metrics.Meter; import io.opentelemetry.instrumentation.resources.JarServiceNameDetector; -import io.opentelemetry.instrumentation.runtimemetrics.java17.RuntimeMetrics; import io.opentelemetry.sdk.autoconfigure.AutoConfiguredOpenTelemetrySdkBuilder; import io.opentelemetry.sdk.common.InstrumentationScopeInfo; import io.opentelemetry.sdk.metrics.data.MetricData; @@ -204,18 +203,10 @@ public void testServiceIsDisabledByDefault() throws Exception { @Test public void testJvmRuntimeMetrics() { - var otel = openTelemetryService.getOpenTelemetry(); - - @Cleanup - var runtimeMetrics = RuntimeMetrics.builder(otel) - .enableAllFeatures() - .enableExperimentalJmxTelemetry() - .build(); - // Attempt collection of GC metrics. The metrics should be populated regardless if GC is triggered or not. Runtime.getRuntime().gc(); - var metrics = reader.collectAllMetrics().stream().sorted(Comparator.comparing(MetricData::getName)).toList(); + var metrics = reader.collectAllMetrics(); // Process Metrics // Replaces process_cpu_seconds_total From 7c3581de0649066e9a08356a925ea10fe541d1a2 Mon Sep 17 00:00:00 2001 From: Dragos Misca Date: Thu, 25 Apr 2024 10:36:02 -0700 Subject: [PATCH 07/15] Cleanup --- conf/pulsar_env.sh | 8 ++++---- pulsar-broker/pom.xml | 1 - .../pulsar/broker/stats/PulsarBrokerOpenTelemetry.java | 4 +--- pulsar-opentelemetry/pom.xml | 2 -- .../pulsar/opentelemetry/OpenTelemetryServiceTest.java | 2 -- 5 files changed, 5 insertions(+), 12 deletions(-) diff --git a/conf/pulsar_env.sh b/conf/pulsar_env.sh index 5a2a142f3d868..e046ce244ec2a 100755 --- a/conf/pulsar_env.sh +++ b/conf/pulsar_env.sh @@ -94,7 +94,7 @@ PULSAR_EXTRA_OPTS="${PULSAR_EXTRA_OPTS:-" -Dpulsar.allocator.exit_on_oom=true -D #Wait time before forcefully kill the pulsar server instance, if the stop is not successful #PULSAR_STOP_TIMEOUT= -# Enable semantically stably telemetry for JVM metrics -OTEL_INSTRUMENTATION_RUNTIME_TELEMETRY_JAVA17_ENABLE_ALL=true -OTEL_INSTRUMENTATION_RUNTIME_TELEMETRY_EMIT_EXPERIMENTAL_TELEMETRY=true -OTEL_SEMCONV_STABILITY_OPT_IN=true \ No newline at end of file +# Enable semantically stable telemetry for JVM metrics, unless otherwise overridden by the user. +if [ -z "$OTEL_SEMCONV_STABILITY_OPT_IN" ]; then + OTEL_SEMCONV_STABILITY_OPT_IN=jvm +fi diff --git a/pulsar-broker/pom.xml b/pulsar-broker/pom.xml index 8d41fbae3e583..3548877912199 100644 --- a/pulsar-broker/pom.xml +++ b/pulsar-broker/pom.xml @@ -623,7 +623,6 @@ org.apache.pulsar.metadata.bookkeeper.PulsarMetadataBookieDriver org.apache.pulsar.metadata.bookkeeper.PulsarMetadataClientDriver - jvm diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/PulsarBrokerOpenTelemetry.java b/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/PulsarBrokerOpenTelemetry.java index 0bbfd9c76f8be..01ca65d2cc537 100644 --- a/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/PulsarBrokerOpenTelemetry.java +++ b/pulsar-broker/src/main/java/org/apache/pulsar/broker/stats/PulsarBrokerOpenTelemetry.java @@ -44,9 +44,7 @@ public PulsarBrokerOpenTelemetry(ServiceConfiguration config, .serviceVersion(PulsarVersion.getVersion()) .builderCustomizer(builderCustomizer) .build(); - var openTelemetry = openTelemetryService.getOpenTelemetry(); - - meter = openTelemetry.getMeter("org.apache.pulsar.broker"); + meter = openTelemetryService.getOpenTelemetry().getMeter("org.apache.pulsar.broker"); } @Override diff --git a/pulsar-opentelemetry/pom.xml b/pulsar-opentelemetry/pom.xml index f0a835e047f60..e32f1b81ff964 100644 --- a/pulsar-opentelemetry/pom.xml +++ b/pulsar-opentelemetry/pom.xml @@ -140,8 +140,6 @@ maven-surefire-plugin - true - true jvm diff --git a/pulsar-opentelemetry/src/test/java/org/apache/pulsar/opentelemetry/OpenTelemetryServiceTest.java b/pulsar-opentelemetry/src/test/java/org/apache/pulsar/opentelemetry/OpenTelemetryServiceTest.java index 1273ff6a9305c..31a6c60f83afe 100644 --- a/pulsar-opentelemetry/src/test/java/org/apache/pulsar/opentelemetry/OpenTelemetryServiceTest.java +++ b/pulsar-opentelemetry/src/test/java/org/apache/pulsar/opentelemetry/OpenTelemetryServiceTest.java @@ -27,11 +27,9 @@ import io.opentelemetry.instrumentation.resources.JarServiceNameDetector; import io.opentelemetry.sdk.autoconfigure.AutoConfiguredOpenTelemetrySdkBuilder; import io.opentelemetry.sdk.common.InstrumentationScopeInfo; -import io.opentelemetry.sdk.metrics.data.MetricData; import io.opentelemetry.sdk.metrics.export.MetricReader; import io.opentelemetry.sdk.testing.exporter.InMemoryMetricReader; import io.opentelemetry.semconv.ResourceAttributes; -import java.util.Comparator; import java.util.List; import java.util.Map; import java.util.concurrent.TimeUnit; From 7c212886c13c4c4ccef0808eae770f2f331432cb Mon Sep 17 00:00:00 2001 From: Dragos Misca Date: Thu, 25 Apr 2024 12:27:52 -0700 Subject: [PATCH 08/15] Allow parsing of scientific notation values in PrometheusMetricsClient --- .../pulsar/broker/stats/prometheus/PrometheusMetricsClient.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pulsar-broker-common/src/test/java/org/apache/pulsar/broker/stats/prometheus/PrometheusMetricsClient.java b/pulsar-broker-common/src/test/java/org/apache/pulsar/broker/stats/prometheus/PrometheusMetricsClient.java index 6fd509690278d..6d724c289b52c 100644 --- a/pulsar-broker-common/src/test/java/org/apache/pulsar/broker/stats/prometheus/PrometheusMetricsClient.java +++ b/pulsar-broker-common/src/test/java/org/apache/pulsar/broker/stats/prometheus/PrometheusMetricsClient.java @@ -59,7 +59,7 @@ public static Multimap parseMetrics(String metrics) { // or // pulsar_subscriptions_count{cluster="standalone", namespace="public/default", // topic="persistent://public/default/test-2"} 0.0 - Pattern pattern = Pattern.compile("^(\\w+)\\{([^}]+)}\\s([+-]?[\\d\\w.-]+)$"); + Pattern pattern = Pattern.compile("^(\\w+)\\{([^}]+)}\\s([+-]?[\\d\\w.+-]+)$"); Pattern tagsPattern = Pattern.compile("(\\w+)=\"([^\"]+)\"(,\\s?)?"); Splitter.on("\n").split(metrics).forEach(line -> { From 5b5b1595a19b770471e8004abb48e3d8a56e0fab Mon Sep 17 00:00:00 2001 From: Dragos Misca Date: Thu, 25 Apr 2024 15:14:15 -0700 Subject: [PATCH 09/15] Fix license check --- distribution/server/src/assemble/LICENSE.bin.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/distribution/server/src/assemble/LICENSE.bin.txt b/distribution/server/src/assemble/LICENSE.bin.txt index c5c243796b6f3..bc7ad410966e5 100644 --- a/distribution/server/src/assemble/LICENSE.bin.txt +++ b/distribution/server/src/assemble/LICENSE.bin.txt @@ -543,6 +543,8 @@ The Apache Software License, Version 2.0 - io.opentelemetry.instrumentation-opentelemetry-instrumentation-api-1.32.1.jar - io.opentelemetry.instrumentation-opentelemetry-instrumentation-api-semconv-1.32.1-alpha.jar - io.opentelemetry.instrumentation-opentelemetry-resources-1.32.1-alpha.jar + - io.opentelemetry.instrumentation-opentelemetry-runtime-telemetry-java17-1.32.1-alpha.jar + - io.opentelemetry.instrumentation-opentelemetry-runtime-telemetry-java8-1.32.1-alpha.jar - io.opentelemetry.semconv-opentelemetry-semconv-1.23.1-alpha.jar BSD 3-clause "New" or "Revised" License From bfd70226ce71b265fa0c1cb162de34c78ae4a951 Mon Sep 17 00:00:00 2001 From: Dragos Misca Date: Mon, 6 May 2024 09:17:58 -0700 Subject: [PATCH 10/15] Update library versions --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 3c817e1baffa2..4a025a40852ce 100644 --- a/pom.xml +++ b/pom.xml @@ -1513,7 +1513,7 @@ flexible messaging model and an intuitive client API. io.opentelemetry.instrumentation opentelemetry-runtime-telemetry-java17 - ${opentelemetry.instrumentation.version} + ${opentelemetry.instrumentation.alpha.version} io.opentelemetry.semconv From a0fb5b6327f5faba4da1e2599982a5ebacac5ef7 Mon Sep 17 00:00:00 2001 From: Lari Hotari Date: Mon, 6 May 2024 20:15:34 +0300 Subject: [PATCH 11/15] [fix][test] Clear MockedPulsarServiceBaseTest fields to prevent test runtime memory leak --- .../pulsar/broker/auth/MockedPulsarServiceBaseTest.java | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pulsar-broker/src/test/java/org/apache/pulsar/broker/auth/MockedPulsarServiceBaseTest.java b/pulsar-broker/src/test/java/org/apache/pulsar/broker/auth/MockedPulsarServiceBaseTest.java index 10d56ce2245f9..eef4469aa95fa 100644 --- a/pulsar-broker/src/test/java/org/apache/pulsar/broker/auth/MockedPulsarServiceBaseTest.java +++ b/pulsar-broker/src/test/java/org/apache/pulsar/broker/auth/MockedPulsarServiceBaseTest.java @@ -277,15 +277,22 @@ protected final void internalCleanup() throws Exception { } if (brokerGateway != null) { brokerGateway.close(); + brokerGateway = null; } if (pulsarTestContext != null) { pulsarTestContext.close(); pulsarTestContext = null; } + resetConfig(); callCloseables(closeables); closeables.clear(); onCleanup(); + + // clear fields to avoid test runtime memory leak, pulsarTestContext already handles closing of these instances + pulsar = null; + mockZooKeeper = null; + mockZooKeeperGlobal = null; } protected void closeAdmin() { From a7e04800b6bfe7247d97e6197524f53ecd88dc4b Mon Sep 17 00:00:00 2001 From: Lari Hotari Date: Mon, 6 May 2024 22:07:15 +0300 Subject: [PATCH 12/15] Drop Otel instance references at closing time to fix test runtime OOM issues caused by TestNG memory leaks --- .../opentelemetry/OpenTelemetryService.java | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/pulsar-opentelemetry/src/main/java/org/apache/pulsar/opentelemetry/OpenTelemetryService.java b/pulsar-opentelemetry/src/main/java/org/apache/pulsar/opentelemetry/OpenTelemetryService.java index 14fdc2965bebb..4560d3813d6dd 100644 --- a/pulsar-opentelemetry/src/main/java/org/apache/pulsar/opentelemetry/OpenTelemetryService.java +++ b/pulsar-opentelemetry/src/main/java/org/apache/pulsar/opentelemetry/OpenTelemetryService.java @@ -30,6 +30,7 @@ import java.io.Closeable; import java.util.Map; import java.util.Objects; +import java.util.concurrent.atomic.AtomicReference; import java.util.function.Consumer; import lombok.Builder; import org.apache.commons.lang3.StringUtils; @@ -43,9 +44,9 @@ public class OpenTelemetryService implements Closeable { public static final String OTEL_SDK_DISABLED_KEY = "otel.sdk.disabled"; static final int MAX_CARDINALITY_LIMIT = 10000; - private final OpenTelemetrySdk openTelemetrySdk; + private final AtomicReference openTelemetrySdkReference = new AtomicReference<>(); - private final RuntimeMetrics runtimeMetrics; + private final AtomicReference runtimeMetricsReference = new AtomicReference<>(); /** * Instantiates the OpenTelemetry SDK. All attributes are overridden by system properties or environment @@ -97,24 +98,28 @@ public OpenTelemetryService(String clusterName, builderCustomizer.accept(sdkBuilder); } - openTelemetrySdk = sdkBuilder.build().getOpenTelemetrySdk(); + openTelemetrySdkReference.set(sdkBuilder.build().getOpenTelemetrySdk()); // For a list of exposed metrics, see https://opentelemetry.io/docs/specs/semconv/runtime/jvm-metrics/ - runtimeMetrics = RuntimeMetrics.builder(openTelemetrySdk) + runtimeMetricsReference.set(RuntimeMetrics.builder(openTelemetrySdkReference.get()) .enableAllFeatures() .enableExperimentalJmxTelemetry() - .build(); + .build()); } public OpenTelemetry getOpenTelemetry() { - return openTelemetrySdk; + return openTelemetrySdkReference.get(); } @Override public void close() { + RuntimeMetrics runtimeMetrics = runtimeMetricsReference.getAndSet(null); if (runtimeMetrics != null) { runtimeMetrics.close(); } - openTelemetrySdk.close(); + OpenTelemetrySdk openTelemetrySdk = openTelemetrySdkReference.getAndSet(null); + if (openTelemetrySdk != null) { + openTelemetrySdk.close(); + } } } From 65031f4c0f9cd8fdeddd6cff07d6f0a3ea85612f Mon Sep 17 00:00:00 2001 From: Dragos Misca Date: Mon, 6 May 2024 12:59:25 -0700 Subject: [PATCH 13/15] Update conf/pulsar_env.sh Co-authored-by: Lari Hotari --- conf/pulsar_env.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/pulsar_env.sh b/conf/pulsar_env.sh index e046ce244ec2a..3a069e31fdc90 100755 --- a/conf/pulsar_env.sh +++ b/conf/pulsar_env.sh @@ -96,5 +96,5 @@ PULSAR_EXTRA_OPTS="${PULSAR_EXTRA_OPTS:-" -Dpulsar.allocator.exit_on_oom=true -D # Enable semantically stable telemetry for JVM metrics, unless otherwise overridden by the user. if [ -z "$OTEL_SEMCONV_STABILITY_OPT_IN" ]; then - OTEL_SEMCONV_STABILITY_OPT_IN=jvm + export OTEL_SEMCONV_STABILITY_OPT_IN=jvm fi From 75a1af627b6d35d6c545d9bb2e51054c86a6f002 Mon Sep 17 00:00:00 2001 From: Dragos Misca Date: Mon, 6 May 2024 13:31:34 -0700 Subject: [PATCH 14/15] Remove extra dependency to opentelemetry-runtime-telemetry-java17 in main pom.xml --- pom.xml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/pom.xml b/pom.xml index 4a025a40852ce..8f7ae2ed1fc68 100644 --- a/pom.xml +++ b/pom.xml @@ -1510,11 +1510,6 @@ flexible messaging model and an intuitive client API. pom import - - io.opentelemetry.instrumentation - opentelemetry-runtime-telemetry-java17 - ${opentelemetry.instrumentation.alpha.version} - io.opentelemetry.semconv opentelemetry-semconv From 93241d2716de17db2397f05e7220acf76a195f78 Mon Sep 17 00:00:00 2001 From: Lari Hotari Date: Tue, 7 May 2024 10:52:01 +0300 Subject: [PATCH 15/15] Capture heap dump on OOM in integration tests --- tests/docker-images/latest-version-image/conf/bookie.conf | 2 +- tests/docker-images/latest-version-image/conf/broker.conf | 2 +- tests/docker-images/latest-version-image/conf/proxy.conf | 2 +- tests/docker-images/latest-version-image/conf/websocket.conf | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/docker-images/latest-version-image/conf/bookie.conf b/tests/docker-images/latest-version-image/conf/bookie.conf index 07547bcaef6d3..457c482d9723e 100644 --- a/tests/docker-images/latest-version-image/conf/bookie.conf +++ b/tests/docker-images/latest-version-image/conf/bookie.conf @@ -22,7 +22,7 @@ autostart=false redirect_stderr=true stdout_logfile=/var/log/pulsar/bookie.log directory=/pulsar -environment=PULSAR_MEM="-Xmx128M -XX:MaxDirectMemorySize=512M",PULSAR_GC="-XX:+UseZGC" +environment=PULSAR_MEM="-Xmx128M -XX:MaxDirectMemorySize=512M -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/var/log/pulsar",PULSAR_GC="-XX:+UseZGC" command=/pulsar/bin/pulsar bookie user=pulsar stopwaitsecs=15 diff --git a/tests/docker-images/latest-version-image/conf/broker.conf b/tests/docker-images/latest-version-image/conf/broker.conf index 63be36437741b..3c2dc6caf99d4 100644 --- a/tests/docker-images/latest-version-image/conf/broker.conf +++ b/tests/docker-images/latest-version-image/conf/broker.conf @@ -22,7 +22,7 @@ autostart=false redirect_stderr=true stdout_logfile=/var/log/pulsar/broker.log directory=/pulsar -environment=PULSAR_MEM="-Xmx128M",PULSAR_GC="-XX:+UseZGC" +environment=PULSAR_MEM="-Xmx128M -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/var/log/pulsar",PULSAR_GC="-XX:+UseZGC" command=/pulsar/bin/pulsar broker user=pulsar stopwaitsecs=15 diff --git a/tests/docker-images/latest-version-image/conf/proxy.conf b/tests/docker-images/latest-version-image/conf/proxy.conf index 343a0f9614e30..f18a93f8ade0c 100644 --- a/tests/docker-images/latest-version-image/conf/proxy.conf +++ b/tests/docker-images/latest-version-image/conf/proxy.conf @@ -22,7 +22,7 @@ autostart=false redirect_stderr=true stdout_logfile=/var/log/pulsar/proxy.log directory=/pulsar -environment=PULSAR_MEM="-Xmx128M",PULSAR_GC="-XX:+UseZGC" +environment=PULSAR_MEM="-Xmx128M -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/var/log/pulsar",PULSAR_GC="-XX:+UseZGC" command=/pulsar/bin/pulsar proxy user=pulsar stopwaitsecs=15 \ No newline at end of file diff --git a/tests/docker-images/latest-version-image/conf/websocket.conf b/tests/docker-images/latest-version-image/conf/websocket.conf index 0418c4cbc26a3..4a09bd97ea0b7 100644 --- a/tests/docker-images/latest-version-image/conf/websocket.conf +++ b/tests/docker-images/latest-version-image/conf/websocket.conf @@ -22,7 +22,7 @@ autostart=false redirect_stderr=true stdout_logfile=/var/log/pulsar/pulsar-websocket.log directory=/pulsar -environment=PULSAR_MEM="-Xmx128M",PULSAR_GC="-XX:+UseZGC" +environment=PULSAR_MEM="-Xmx128M -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/var/log/pulsar",PULSAR_GC="-XX:+UseZGC" command=/pulsar/bin/pulsar websocket user=pulsar stopwaitsecs=15 \ No newline at end of file