Skip to content

Commit

Permalink
allegro-internal/flex-roadmap#819 Migrated metrics to prometheus | ch…
Browse files Browse the repository at this point in the history
…anged metrics names [2]
  • Loading branch information
nastassia-dailidava committed Oct 18, 2024
1 parent 616245d commit 1f7f2bc
Show file tree
Hide file tree
Showing 5 changed files with 33 additions and 21 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,17 @@ import pl.allegro.tech.servicemesh.envoycontrol.logger
import pl.allegro.tech.servicemesh.envoycontrol.services.MultiClusterState
import pl.allegro.tech.servicemesh.envoycontrol.utils.CHECKPOINT_TAG
import pl.allegro.tech.servicemesh.envoycontrol.utils.COMMUNICATION_MODE_ERROR_METRIC
import pl.allegro.tech.servicemesh.envoycontrol.utils.ERRORS_TOTAL_METRIC
import pl.allegro.tech.servicemesh.envoycontrol.utils.METRIC_EMITTER_TAG
import pl.allegro.tech.servicemesh.envoycontrol.utils.OPERATION_TAG
import pl.allegro.tech.servicemesh.envoycontrol.utils.ParallelizableScheduler
import pl.allegro.tech.servicemesh.envoycontrol.utils.SERVICES_STATE_METRIC
import pl.allegro.tech.servicemesh.envoycontrol.utils.SERVICE_TAG
import pl.allegro.tech.servicemesh.envoycontrol.utils.SIMPLE_CACHE_METRIC
import pl.allegro.tech.servicemesh.envoycontrol.utils.SNAPSHOT_ERROR_METRIC
import pl.allegro.tech.servicemesh.envoycontrol.utils.SNAPSHOT_GROUP_ERROR_METRIC
import pl.allegro.tech.servicemesh.envoycontrol.utils.SNAPSHOT_METRIC
import pl.allegro.tech.servicemesh.envoycontrol.utils.SNAPSHOT_STATUS_TAG
import pl.allegro.tech.servicemesh.envoycontrol.utils.SNAPSHOT_UPDATE_DURATION_METRIC
import pl.allegro.tech.servicemesh.envoycontrol.utils.UPDATE_TRIGGER_TAG
import pl.allegro.tech.servicemesh.envoycontrol.utils.doOnNextScheduledOn
import pl.allegro.tech.servicemesh.envoycontrol.utils.measureBuffer
Expand Down Expand Up @@ -115,7 +117,7 @@ class SnapshotUpdater(
.metrics()
.onErrorResume { e ->
meterRegistry.counter(
ERRORS_TOTAL_METRIC,
SNAPSHOT_ERROR_METRIC,
Tags.of(UPDATE_TRIGGER_TAG, "groups", METRIC_EMITTER_TAG, "snapshot-updater")
)
.increment()
Expand Down Expand Up @@ -164,7 +166,7 @@ class SnapshotUpdater(
.filter { it != emptyUpdateResult }
.onErrorResume { e ->
meterRegistry.counter(
ERRORS_TOTAL_METRIC,
SNAPSHOT_ERROR_METRIC,
Tags.of(METRIC_EMITTER_TAG, "snapshot-updater", UPDATE_TRIGGER_TAG, "services")
).increment()
logger.error("Unable to process service changes", e)
Expand All @@ -188,7 +190,7 @@ class SnapshotUpdater(
}
} catch (e: Throwable) {
meterRegistry.counter(
ERRORS_TOTAL_METRIC,
SNAPSHOT_GROUP_ERROR_METRIC,
Tags.of(
SERVICE_TAG, group.serviceName,
OPERATION_TAG, "create-snapshot",
Expand All @@ -199,7 +201,7 @@ class SnapshotUpdater(
}
}

private val updateSnapshotForGroupsTimer = meterRegistry.timer("snapshot.update.duration.seconds")
private val updateSnapshotForGroupsTimer = meterRegistry.timer(SNAPSHOT_UPDATE_DURATION_METRIC)

private fun updateSnapshotForGroups(
groups: Collection<Group>,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@ import pl.allegro.tech.servicemesh.envoycontrol.utils.CLUSTER_TAG
import pl.allegro.tech.servicemesh.envoycontrol.utils.CROSS_DC_SYNC_CANCELLED_METRIC
import pl.allegro.tech.servicemesh.envoycontrol.utils.CROSS_DC_SYNC_SECONDS_METRIC
import pl.allegro.tech.servicemesh.envoycontrol.utils.CROSS_DC_SYNC_TOTAL_METRIC
import pl.allegro.tech.servicemesh.envoycontrol.utils.ERRORS_TOTAL_METRIC
import pl.allegro.tech.servicemesh.envoycontrol.utils.OPERATION_TAG
import pl.allegro.tech.servicemesh.envoycontrol.utils.METRIC_EMITTER_TAG
import pl.allegro.tech.servicemesh.envoycontrol.utils.OPERATION_TAG
import pl.allegro.tech.servicemesh.envoycontrol.utils.SERVICES_STATE_ERRORS_METRIC
import reactor.core.publisher.Flux
import reactor.core.publisher.FluxSink
import java.lang.Integer.max
Expand Down Expand Up @@ -72,7 +72,7 @@ class RemoteServices(
.orTimeout(interval, TimeUnit.SECONDS)
.exceptionally {
meterRegistry.counter(
ERRORS_TOTAL_METRIC,
SERVICES_STATE_ERRORS_METRIC,
Tags.of(
CLUSTER_TAG, cluster,
OPERATION_TAG, "get-state",
Expand All @@ -90,7 +90,7 @@ class RemoteServices(
cluster to instances
} catch (e: Exception) {
meterRegistry.counter(
ERRORS_TOTAL_METRIC,
SERVICES_STATE_ERRORS_METRIC,
Tags.of(
CLUSTER_TAG, cluster,
OPERATION_TAG, "get-instances",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,16 @@ import io.micrometer.core.instrument.noop.NoopTimer
val noopTimer = NoopTimer(Meter.Id("", Tags.empty(), null, null, Meter.Type.TIMER))
const val REACTOR_METRIC = "reactor.stats"
const val SERVICES_STATE_METRIC = "services.state"
const val SERVICES_STATE_ERRORS_METRIC = "services.state.errors"
const val SNAPSHOT_METRIC = "snapshot"
const val ERRORS_TOTAL_METRIC = "errors.total"
const val SNAPSHOT_UPDATE_DURATION_METRIC = "snapshot.update.duration.seconds"
const val SNAPSHOT_ERROR_METRIC = "snapshot.errors"
const val SNAPSHOT_GROUP_ERROR_METRIC = "snapshot.group.errors"
const val WATCH_ERRORS_METRIC = "watch.errors.total"
const val COMMUNICATION_MODE_ERROR_METRIC = "communication.errors.total"
const val CONNECTIONS_METRIC = "connections"
const val CONNECTIONS_METRIC = "connections.stats"
const val REQUESTS_METRIC = "stream.requests"
const val WATCH_METRIC = "watch"
const val WATCH_METRIC = "watch.stats"
const val ENVOY_CONTROL_WARM_UP_METRIC = "envoy.control.warmup.seconds"
const val CROSS_DC_SYNC_METRIC = "cross.dc.synchronization"
const val CROSS_DC_SYNC_CANCELLED_METRIC = "$CROSS_DC_SYNC_METRIC.cancelled.total"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,13 +57,13 @@ import pl.allegro.tech.servicemesh.envoycontrol.snapshot.resource.routes.EnvoyIn
import pl.allegro.tech.servicemesh.envoycontrol.snapshot.resource.routes.RequestPolicyMapper
import pl.allegro.tech.servicemesh.envoycontrol.snapshot.resource.routes.ServiceTagMetadataGenerator
import pl.allegro.tech.servicemesh.envoycontrol.utils.DirectScheduler
import pl.allegro.tech.servicemesh.envoycontrol.utils.ParallelScheduler
import pl.allegro.tech.servicemesh.envoycontrol.utils.ParallelizableScheduler
import pl.allegro.tech.servicemesh.envoycontrol.utils.any
import pl.allegro.tech.servicemesh.envoycontrol.utils.ERRORS_TOTAL_METRIC
import pl.allegro.tech.servicemesh.envoycontrol.utils.METRIC_EMITTER_TAG
import pl.allegro.tech.servicemesh.envoycontrol.utils.OPERATION_TAG
import pl.allegro.tech.servicemesh.envoycontrol.utils.ParallelScheduler
import pl.allegro.tech.servicemesh.envoycontrol.utils.ParallelizableScheduler
import pl.allegro.tech.servicemesh.envoycontrol.utils.SERVICE_TAG
import pl.allegro.tech.servicemesh.envoycontrol.utils.SNAPSHOT_GROUP_ERROR_METRIC
import pl.allegro.tech.servicemesh.envoycontrol.utils.any
import reactor.core.publisher.Flux
import reactor.core.publisher.Mono
import reactor.core.scheduler.Schedulers
Expand Down Expand Up @@ -473,8 +473,14 @@ class SnapshotUpdaterTest {
val snapshot = cache.getSnapshot(servicesGroup)
assertThat(snapshot).isEqualTo(null)
assertThat(
simpleMeterRegistry.find(ERRORS_TOTAL_METRIC)
.tags(Tags.of(SERVICE_TAG, "example-service", OPERATION_TAG, "create-snapshot", METRIC_EMITTER_TAG, "snapshot-updater"))
simpleMeterRegistry.find(SNAPSHOT_GROUP_ERROR_METRIC)
.tags(
Tags.of(
SERVICE_TAG, "example-service",
OPERATION_TAG, "create-snapshot",
METRIC_EMITTER_TAG, "snapshot-updater"
)
)
.counter()?.count()
).isEqualTo(1.0)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ import pl.allegro.tech.servicemesh.envoycontrol.services.transformers.ServiceIns
import pl.allegro.tech.servicemesh.envoycontrol.snapshot.resource.listeners.filters.EnvoyHttpFilters
import pl.allegro.tech.servicemesh.envoycontrol.synchronization.GlobalStateChanges
import pl.allegro.tech.servicemesh.envoycontrol.utils.CACHE_GROUP_COUNT_METRIC
import pl.allegro.tech.servicemesh.envoycontrol.utils.ERRORS_TOTAL_METRIC
import pl.allegro.tech.servicemesh.envoycontrol.utils.WATCH_ERRORS_METRIC
import pl.allegro.tech.servicemesh.envoycontrol.utils.METRIC_EMITTER_TAG
import pl.allegro.tech.servicemesh.envoycontrol.utils.STATUS_TAG
import pl.allegro.tech.servicemesh.envoycontrol.utils.WATCH_METRIC
Expand Down Expand Up @@ -198,8 +198,8 @@ class ControlPlaneConfig {
)
meterRegistry.gauge(CACHE_GROUP_COUNT_METRIC, it.cacheGroupsCount)
it.meterRegistry.more().counter(
ERRORS_TOTAL_METRIC,
Tags.of(METRIC_EMITTER_TAG, WATCH_METRIC, WATCH_TYPE_TAG, "service"),
WATCH_ERRORS_METRIC,
Tags.of(METRIC_EMITTER_TAG, "watch-services"),
it.errorWatchingServices
)
}
Expand Down

0 comments on commit 1f7f2bc

Please sign in to comment.