From d75998fdd3fcb5a3ecc0ccfc3e47a8bc7f97c33b Mon Sep 17 00:00:00 2001 From: Vedant Gupta <49195734+veds-g@users.noreply.github.com> Date: Mon, 16 Dec 2024 03:38:59 +0530 Subject: [PATCH] feat: pipeline gauge metrics (#2284) Signed-off-by: veds-g --- .../namespaced-numaflow-server.yaml | 87 +++++++++++------ config/advanced-install/numaflow-server.yaml | 87 +++++++++++------ .../numaflow-server-metrics-proxy-config.yaml | 87 +++++++++++------ config/install.yaml | 87 +++++++++++------ config/namespace-install.yaml | 87 +++++++++++------ server/apis/v1/promql_service_test.go | 97 ++++++++++++++++++- .../PodDetails/partials/Metrics/index.tsx | 5 + .../Metrics/partials/LineChart/index.tsx | 20 +++- .../partials/Metrics/utils/constants.ts | 6 +- 9 files changed, 397 insertions(+), 166 deletions(-) diff --git a/config/advanced-install/namespaced-numaflow-server.yaml b/config/advanced-install/namespaced-numaflow-server.yaml index 9d34601fd..7cb350b07 100644 --- a/config/advanced-install/namespaced-numaflow-server.yaml +++ b/config/advanced-install/namespaced-numaflow-server.yaml @@ -143,33 +143,71 @@ data: # example for local prometheus service # url: http://prometheus-operated.monitoring.svc.cluster.local:9090 patterns: + - name: vertex_gauge + object: vertex + title: Vertex Pending Messages + description: This query is the total number of pending messages for the vertex + expr: | + sum($metric_name{$filters}) by ($dimension, period) + params: + - name: start_time + required: false + - name: end_time + required: false + metrics: + - metric_name: vertex_pending_messages + required_filters: + - namespace + - pipeline + - vertex + dimensions: + - name: pod + # expr: optional expression for prometheus query + # overrides the default expression + filters: + - name: pod + required: false + - name: period + required: false + - name: vertex + # expr: optional expression for prometheus query + # overrides the default expression + filters: + - name: period + required: false + - name: mono_vertex_gauge object: mono-vertex title: Pending Messages Lag description: This query is the total number of pending messages for the mono vertex expr: | - $metric_name{$filters} + sum($metric_name{$filters}) by ($dimension, period) params: - name: start_time required: false - name: end_time required: false metrics: - - metric_name: monovtx_pending - required_filters: - - namespace - - mvtx_name - dimensions: - - name: pod - filters: - - name: pod - required: false - - name: period - required: false - - name: mono-vertex - filters: - - name: period - required: false + - metric_name: monovtx_pending + required_filters: + - namespace + - mvtx_name + dimensions: + - name: pod + # expr: optional expression for prometheus query + # overrides the default expression + filters: + - name: pod + required: false + - name: period + required: false + - name: mono-vertex + # expr: optional expression for prometheus query + # overrides the default expression + filters: + - name: period + required: false + - name: mono_vertex_histogram object: mono-vertex title: Processing Time Latency @@ -192,11 +230,7 @@ data: - mvtx_name dimensions: - name: mono-vertex - # expr: optional expression for prometheus query - # overrides the default expression - name: pod - # expr: optional expression for prometheus query - # overrides the default expression filters: - name: pod required: false @@ -206,15 +240,11 @@ data: - mvtx_name dimensions: - name: mono-vertex - # expr: optional expression for prometheus query - # overrides the default expression - name: pod - # expr: optional expression for prometheus query - # overrides the default expression filters: - name: pod required: false - # Add histogram metrics similar to the pattern above + - name: vertex_throughput object: vertex title: Vertex Throughput and Message Rates @@ -235,12 +265,11 @@ data: - vertex dimensions: - name: vertex - # expr: optional expression for prometheus query - # overrides the default expression - name: pod filters: - name: pod required: false + - name: mono_vertex_throughput object: mono-vertex title: Mono-Vertex Throughput and Message Rates @@ -260,11 +289,7 @@ data: - mvtx_name dimensions: - name: mono-vertex - # expr: optional expression for prometheus query - # overrides the default expression - name: pod - # expr: optional expression for prometheus query - # overrides the default expression filters: - name: pod required: false diff --git a/config/advanced-install/numaflow-server.yaml b/config/advanced-install/numaflow-server.yaml index bca4a13bb..fcb283f11 100644 --- a/config/advanced-install/numaflow-server.yaml +++ b/config/advanced-install/numaflow-server.yaml @@ -150,33 +150,71 @@ data: # example for local prometheus service # url: http://prometheus-operated.monitoring.svc.cluster.local:9090 patterns: + - name: vertex_gauge + object: vertex + title: Vertex Pending Messages + description: This query is the total number of pending messages for the vertex + expr: | + sum($metric_name{$filters}) by ($dimension, period) + params: + - name: start_time + required: false + - name: end_time + required: false + metrics: + - metric_name: vertex_pending_messages + required_filters: + - namespace + - pipeline + - vertex + dimensions: + - name: pod + # expr: optional expression for prometheus query + # overrides the default expression + filters: + - name: pod + required: false + - name: period + required: false + - name: vertex + # expr: optional expression for prometheus query + # overrides the default expression + filters: + - name: period + required: false + - name: mono_vertex_gauge object: mono-vertex title: Pending Messages Lag description: This query is the total number of pending messages for the mono vertex expr: | - $metric_name{$filters} + sum($metric_name{$filters}) by ($dimension, period) params: - name: start_time required: false - name: end_time required: false metrics: - - metric_name: monovtx_pending - required_filters: - - namespace - - mvtx_name - dimensions: - - name: pod - filters: - - name: pod - required: false - - name: period - required: false - - name: mono-vertex - filters: - - name: period - required: false + - metric_name: monovtx_pending + required_filters: + - namespace + - mvtx_name + dimensions: + - name: pod + # expr: optional expression for prometheus query + # overrides the default expression + filters: + - name: pod + required: false + - name: period + required: false + - name: mono-vertex + # expr: optional expression for prometheus query + # overrides the default expression + filters: + - name: period + required: false + - name: mono_vertex_histogram object: mono-vertex title: Processing Time Latency @@ -199,11 +237,7 @@ data: - mvtx_name dimensions: - name: mono-vertex - # expr: optional expression for prometheus query - # overrides the default expression - name: pod - # expr: optional expression for prometheus query - # overrides the default expression filters: - name: pod required: false @@ -213,15 +247,11 @@ data: - mvtx_name dimensions: - name: mono-vertex - # expr: optional expression for prometheus query - # overrides the default expression - name: pod - # expr: optional expression for prometheus query - # overrides the default expression filters: - name: pod required: false - # Add histogram metrics similar to the pattern above + - name: vertex_throughput object: vertex title: Vertex Throughput and Message Rates @@ -242,12 +272,11 @@ data: - vertex dimensions: - name: vertex - # expr: optional expression for prometheus query - # overrides the default expression - name: pod filters: - name: pod required: false + - name: mono_vertex_throughput object: mono-vertex title: Mono-Vertex Throughput and Message Rates @@ -267,11 +296,7 @@ data: - mvtx_name dimensions: - name: mono-vertex - # expr: optional expression for prometheus query - # overrides the default expression - name: pod - # expr: optional expression for prometheus query - # overrides the default expression filters: - name: pod required: false diff --git a/config/base/numaflow-server/numaflow-server-metrics-proxy-config.yaml b/config/base/numaflow-server/numaflow-server-metrics-proxy-config.yaml index d97370e39..fe634f5f1 100644 --- a/config/base/numaflow-server/numaflow-server-metrics-proxy-config.yaml +++ b/config/base/numaflow-server/numaflow-server-metrics-proxy-config.yaml @@ -9,33 +9,71 @@ data: # example for local prometheus service # url: http://prometheus-operated.monitoring.svc.cluster.local:9090 patterns: + - name: vertex_gauge + object: vertex + title: Vertex Pending Messages + description: This query is the total number of pending messages for the vertex + expr: | + sum($metric_name{$filters}) by ($dimension, period) + params: + - name: start_time + required: false + - name: end_time + required: false + metrics: + - metric_name: vertex_pending_messages + required_filters: + - namespace + - pipeline + - vertex + dimensions: + - name: pod + # expr: optional expression for prometheus query + # overrides the default expression + filters: + - name: pod + required: false + - name: period + required: false + - name: vertex + # expr: optional expression for prometheus query + # overrides the default expression + filters: + - name: period + required: false + - name: mono_vertex_gauge object: mono-vertex title: Pending Messages Lag description: This query is the total number of pending messages for the mono vertex expr: | - $metric_name{$filters} + sum($metric_name{$filters}) by ($dimension, period) params: - name: start_time required: false - name: end_time required: false metrics: - - metric_name: monovtx_pending - required_filters: - - namespace - - mvtx_name - dimensions: - - name: pod - filters: - - name: pod - required: false - - name: period - required: false - - name: mono-vertex - filters: - - name: period - required: false + - metric_name: monovtx_pending + required_filters: + - namespace + - mvtx_name + dimensions: + - name: pod + # expr: optional expression for prometheus query + # overrides the default expression + filters: + - name: pod + required: false + - name: period + required: false + - name: mono-vertex + # expr: optional expression for prometheus query + # overrides the default expression + filters: + - name: period + required: false + - name: mono_vertex_histogram object: mono-vertex title: Processing Time Latency @@ -58,11 +96,7 @@ data: - mvtx_name dimensions: - name: mono-vertex - # expr: optional expression for prometheus query - # overrides the default expression - name: pod - # expr: optional expression for prometheus query - # overrides the default expression filters: - name: pod required: false @@ -72,15 +106,11 @@ data: - mvtx_name dimensions: - name: mono-vertex - # expr: optional expression for prometheus query - # overrides the default expression - name: pod - # expr: optional expression for prometheus query - # overrides the default expression filters: - name: pod required: false - # Add histogram metrics similar to the pattern above + - name: vertex_throughput object: vertex title: Vertex Throughput and Message Rates @@ -101,12 +131,11 @@ data: - vertex dimensions: - name: vertex - # expr: optional expression for prometheus query - # overrides the default expression - name: pod filters: - name: pod required: false + - name: mono_vertex_throughput object: mono-vertex title: Mono-Vertex Throughput and Message Rates @@ -126,11 +155,7 @@ data: - mvtx_name dimensions: - name: mono-vertex - # expr: optional expression for prometheus query - # overrides the default expression - name: pod - # expr: optional expression for prometheus query - # overrides the default expression filters: - name: pod required: false \ No newline at end of file diff --git a/config/install.yaml b/config/install.yaml index 72df249f0..69fbd4ca4 100644 --- a/config/install.yaml +++ b/config/install.yaml @@ -28563,33 +28563,71 @@ data: # example for local prometheus service # url: http://prometheus-operated.monitoring.svc.cluster.local:9090 patterns: + - name: vertex_gauge + object: vertex + title: Vertex Pending Messages + description: This query is the total number of pending messages for the vertex + expr: | + sum($metric_name{$filters}) by ($dimension, period) + params: + - name: start_time + required: false + - name: end_time + required: false + metrics: + - metric_name: vertex_pending_messages + required_filters: + - namespace + - pipeline + - vertex + dimensions: + - name: pod + # expr: optional expression for prometheus query + # overrides the default expression + filters: + - name: pod + required: false + - name: period + required: false + - name: vertex + # expr: optional expression for prometheus query + # overrides the default expression + filters: + - name: period + required: false + - name: mono_vertex_gauge object: mono-vertex title: Pending Messages Lag description: This query is the total number of pending messages for the mono vertex expr: | - $metric_name{$filters} + sum($metric_name{$filters}) by ($dimension, period) params: - name: start_time required: false - name: end_time required: false metrics: - - metric_name: monovtx_pending - required_filters: - - namespace - - mvtx_name - dimensions: - - name: pod - filters: - - name: pod - required: false - - name: period - required: false - - name: mono-vertex - filters: - - name: period - required: false + - metric_name: monovtx_pending + required_filters: + - namespace + - mvtx_name + dimensions: + - name: pod + # expr: optional expression for prometheus query + # overrides the default expression + filters: + - name: pod + required: false + - name: period + required: false + - name: mono-vertex + # expr: optional expression for prometheus query + # overrides the default expression + filters: + - name: period + required: false + - name: mono_vertex_histogram object: mono-vertex title: Processing Time Latency @@ -28612,11 +28650,7 @@ data: - mvtx_name dimensions: - name: mono-vertex - # expr: optional expression for prometheus query - # overrides the default expression - name: pod - # expr: optional expression for prometheus query - # overrides the default expression filters: - name: pod required: false @@ -28626,15 +28660,11 @@ data: - mvtx_name dimensions: - name: mono-vertex - # expr: optional expression for prometheus query - # overrides the default expression - name: pod - # expr: optional expression for prometheus query - # overrides the default expression filters: - name: pod required: false - # Add histogram metrics similar to the pattern above + - name: vertex_throughput object: vertex title: Vertex Throughput and Message Rates @@ -28655,12 +28685,11 @@ data: - vertex dimensions: - name: vertex - # expr: optional expression for prometheus query - # overrides the default expression - name: pod filters: - name: pod required: false + - name: mono_vertex_throughput object: mono-vertex title: Mono-Vertex Throughput and Message Rates @@ -28680,11 +28709,7 @@ data: - mvtx_name dimensions: - name: mono-vertex - # expr: optional expression for prometheus query - # overrides the default expression - name: pod - # expr: optional expression for prometheus query - # overrides the default expression filters: - name: pod required: false diff --git a/config/namespace-install.yaml b/config/namespace-install.yaml index 1ae302f71..810422a7c 100644 --- a/config/namespace-install.yaml +++ b/config/namespace-install.yaml @@ -28451,33 +28451,71 @@ data: # example for local prometheus service # url: http://prometheus-operated.monitoring.svc.cluster.local:9090 patterns: + - name: vertex_gauge + object: vertex + title: Vertex Pending Messages + description: This query is the total number of pending messages for the vertex + expr: | + sum($metric_name{$filters}) by ($dimension, period) + params: + - name: start_time + required: false + - name: end_time + required: false + metrics: + - metric_name: vertex_pending_messages + required_filters: + - namespace + - pipeline + - vertex + dimensions: + - name: pod + # expr: optional expression for prometheus query + # overrides the default expression + filters: + - name: pod + required: false + - name: period + required: false + - name: vertex + # expr: optional expression for prometheus query + # overrides the default expression + filters: + - name: period + required: false + - name: mono_vertex_gauge object: mono-vertex title: Pending Messages Lag description: This query is the total number of pending messages for the mono vertex expr: | - $metric_name{$filters} + sum($metric_name{$filters}) by ($dimension, period) params: - name: start_time required: false - name: end_time required: false metrics: - - metric_name: monovtx_pending - required_filters: - - namespace - - mvtx_name - dimensions: - - name: pod - filters: - - name: pod - required: false - - name: period - required: false - - name: mono-vertex - filters: - - name: period - required: false + - metric_name: monovtx_pending + required_filters: + - namespace + - mvtx_name + dimensions: + - name: pod + # expr: optional expression for prometheus query + # overrides the default expression + filters: + - name: pod + required: false + - name: period + required: false + - name: mono-vertex + # expr: optional expression for prometheus query + # overrides the default expression + filters: + - name: period + required: false + - name: mono_vertex_histogram object: mono-vertex title: Processing Time Latency @@ -28500,11 +28538,7 @@ data: - mvtx_name dimensions: - name: mono-vertex - # expr: optional expression for prometheus query - # overrides the default expression - name: pod - # expr: optional expression for prometheus query - # overrides the default expression filters: - name: pod required: false @@ -28514,15 +28548,11 @@ data: - mvtx_name dimensions: - name: mono-vertex - # expr: optional expression for prometheus query - # overrides the default expression - name: pod - # expr: optional expression for prometheus query - # overrides the default expression filters: - name: pod required: false - # Add histogram metrics similar to the pattern above + - name: vertex_throughput object: vertex title: Vertex Throughput and Message Rates @@ -28543,12 +28573,11 @@ data: - vertex dimensions: - name: vertex - # expr: optional expression for prometheus query - # overrides the default expression - name: pod filters: - name: pod required: false + - name: mono_vertex_throughput object: mono-vertex title: Mono-Vertex Throughput and Message Rates @@ -28568,11 +28597,7 @@ data: - mvtx_name dimensions: - name: mono-vertex - # expr: optional expression for prometheus query - # overrides the default expression - name: pod - # expr: optional expression for prometheus query - # overrides the default expression filters: - name: pod required: false diff --git a/server/apis/v1/promql_service_test.go b/server/apis/v1/promql_service_test.go index 3d923bb85..54270cf0d 100644 --- a/server/apis/v1/promql_service_test.go +++ b/server/apis/v1/promql_service_test.go @@ -274,7 +274,7 @@ func Test_PromQueryBuilder(t *testing.T) { }) } - // tests for gauge metrics + // tests for mono-vertex gauge metrics var gauge_service = &PromQlService{ PlaceHolders: map[string]map[string][]string{ "monovtx_pending": { @@ -305,7 +305,7 @@ func Test_PromQueryBuilder(t *testing.T) { "period": "5m", }, }, - expectedQuery: `monovtx_pending{namespace= "test_namespace", mvtx_name= "test_mvtx", period= "5m"}`, + expectedQuery: `sum(monovtx_pending{namespace= "test_namespace", mvtx_name= "test_mvtx", period= "5m"}) by (mvtx_name, period)`, }, { name: "Missing metric name in service config", @@ -337,6 +337,72 @@ func Test_PromQueryBuilder(t *testing.T) { } }) } + + // tests for pipeline gauge metrics + var pl_gauge_service = &PromQlService{ + PlaceHolders: map[string]map[string][]string{ + "vertex_pending_messages": { + "vertex": {"$dimension", "$metric_name", "$filters"}, + }, + }, + Expression: map[string]map[string]string{ + "vertex_pending_messages": { + "vertex": "$metric_name{$filters}", + }, + }, + } + + pl_gauge_metrics_tests := []struct { + name string + requestBody MetricsRequestBody + expectedQuery string + expectError bool + }{ + { + name: "Successful pipeline gauge metrics template substitution", + requestBody: MetricsRequestBody{ + MetricName: "vertex_pending_messages", + Dimension: "vertex", + Filters: map[string]string{ + "namespace": "test_namespace", + "pipeline": "test_pipeline", + "vertex": "test_vertex", + "period": "5m", + }, + }, + expectedQuery: `sum(vertex_pending_messages{namespace= "test_namespace", pipeline= "test_pipeline", vertex= "test_vertex", period= "5m"}) by (vertex, period)`, + }, + { + name: "Missing metric name in service config", + requestBody: MetricsRequestBody{ + MetricName: "non_existent_metric", + Dimension: "mono-vertex", + Filters: map[string]string{ + "namespace": "test_namespace", + "pipeline": "test_pipeline", + "vertex": "test_vertex", + "period": "5m", + }, + }, + expectError: true, + }, + } + + for _, tt := range pl_gauge_metrics_tests { + t.Run(tt.name, func(t *testing.T) { + actualQuery, err := pl_gauge_service.BuildQuery(tt.requestBody) + if tt.expectError { + assert.Error(t, err) + } else { + assert.NoError(t, err) + if !comparePrometheusQueries(tt.expectedQuery, actualQuery) { + t.Errorf("Prometheus queries do not match.\nExpected: %s\nGot: %s", tt.expectedQuery, actualQuery) + } else { + t.Log("Prometheus queries match!") + } + } + }) + } } func Test_QueryPrometheus(t *testing.T) { @@ -386,14 +452,37 @@ func Test_QueryPrometheus(t *testing.T) { assert.Equal(t, 1, matrix.Len()) }) - t.Run("Successful gauge query", func(t *testing.T) { + t.Run("Successful mono-vertex gauge query", func(t *testing.T) { + mockAPI := &MockPrometheusAPI{} + promQlService := &PromQlService{ + PrometheusClient: &Prometheus{ + Api: mockAPI, + }, + } + query := `sum(monovtx_pending{namespace="default", mvtx_name="test-mvtx", period="5m"}) by (mvtx_name, period)` + startTime := time.Now().Add(-30 * time.Minute) + endTime := time.Now() + + ctx := context.Background() + result, err := promQlService.QueryPrometheus(ctx, query, startTime, endTime) + + assert.NoError(t, err) + assert.NotNil(t, result) + + // for query range , response should be a matrix + matrix, ok := result.(model.Matrix) + assert.True(t, ok) + assert.Equal(t, 1, matrix.Len()) + }) + + t.Run("Successful pipeline gauge query", func(t *testing.T) { mockAPI := &MockPrometheusAPI{} promQlService := &PromQlService{ PrometheusClient: &Prometheus{ Api: mockAPI, }, } - query := `monovtx_pending{namespace="default", mvtx_name="test-mvtx", pending="5m"}` + query := `sum(vertex_pending_messages{namespace="default", pipeline="test-pipeline", vertex="test-vertex", period="5m"}) by (vertex, period)` startTime := time.Now().Add(-30 * time.Minute) endTime := time.Now() diff --git a/ui/src/components/pages/Pipeline/partials/Graph/partials/NodeInfo/partials/Pods/partials/PodDetails/partials/Metrics/index.tsx b/ui/src/components/pages/Pipeline/partials/Graph/partials/NodeInfo/partials/Pods/partials/PodDetails/partials/Metrics/index.tsx index a32fec532..71f5ee862 100644 --- a/ui/src/components/pages/Pipeline/partials/Graph/partials/NodeInfo/partials/Pods/partials/PodDetails/partials/Metrics/index.tsx +++ b/ui/src/components/pages/Pipeline/partials/Graph/partials/NodeInfo/partials/Pods/partials/PodDetails/partials/Metrics/index.tsx @@ -65,6 +65,11 @@ export function Metrics({ namespaceId, pipelineId, type, vertexId }: MetricsProp return ( {discoveredMetrics?.data?.map((metric: any) => { + if ( + type === "source" && + metric?.metric_name === "vertex_pending_messages" + ) + return null; const panelId = `${metric?.metric_name}-panel`; return ( { switch (metricName) { case "monovtx_pending": - return "period"; + case "vertex_pending_messages": + return dimension === "pod" ? ["pod", "period"] : ["period"]; } switch (dimension) { case "mono-vertex": - return "mvtx_name"; + return ["mvtx_name"]; default: - return dimension; + return [dimension]; } }, []); @@ -141,7 +142,18 @@ const LineChartComponent = ({ metricsReq?.metric_name ); chartData?.forEach((item) => { - const labelVal = item?.metric?.[label]; + let labelVal = ""; + label?.forEach((eachLabel: string) => { + if (item?.metric?.[eachLabel] !== undefined) { + labelVal += (labelVal ? "-" : "") + item.metric[eachLabel]; + } + }); + + // Remove initial hyphen if labelVal is not empty + if (labelVal.startsWith("-") && labelVal.length > 1) { + labelVal = labelVal.substring(1); + } + labels.push(labelVal); item?.values?.forEach(([timestamp, value]: [number, string]) => { const date = new Date(timestamp * 1000); diff --git a/ui/src/components/pages/Pipeline/partials/Graph/partials/NodeInfo/partials/Pods/partials/PodDetails/partials/Metrics/utils/constants.ts b/ui/src/components/pages/Pipeline/partials/Graph/partials/NodeInfo/partials/Pods/partials/PodDetails/partials/Metrics/utils/constants.ts index 56833633f..b953f217e 100644 --- a/ui/src/components/pages/Pipeline/partials/Graph/partials/NodeInfo/partials/Pods/partials/PodDetails/partials/Metrics/utils/constants.ts +++ b/ui/src/components/pages/Pipeline/partials/Graph/partials/NodeInfo/partials/Pods/partials/PodDetails/partials/Metrics/utils/constants.ts @@ -40,7 +40,7 @@ export const metricNameMap: { [p: string]: string } = { "Mono Vertex Sink Write Time Latency (in micro seconds)", forwarder_data_read_total: "Vertex Read Processing Rate (messages per second)", - monovtx_read_total: - "Mono Vertex Read Processing Rate (messages per second)", - monovtx_pending: "Mono Vertex Pending", + monovtx_read_total: "Mono Vertex Read Processing Rate (messages per second)", + monovtx_pending: "Mono Vertex Pending Messages", + vertex_pending_messages: "Vertex Pending Messages", };