From 43315bd4e75e5cc9c758729f9517cc25c2b90f12 Mon Sep 17 00:00:00 2001 From: Anush008 Date: Fri, 20 Dec 2024 12:23:58 +0530 Subject: [PATCH 1/4] feat: Support for Memory/Hardware metrics Signed-off-by: Anush008 --- qdrant/assets/dashboards/qdrant_overview.json | 357 ++++++++++++++++++ qdrant/datadog_checks/qdrant/metrics.py | 6 + qdrant/metadata.csv | 6 + qdrant/tests/common.py | 6 + qdrant/tests/data/metrics.txt | 67 +++- 5 files changed, 430 insertions(+), 12 deletions(-) diff --git a/qdrant/assets/dashboards/qdrant_overview.json b/qdrant/assets/dashboards/qdrant_overview.json index 8e929fbb47..436683f61c 100644 --- a/qdrant/assets/dashboards/qdrant_overview.json +++ b/qdrant/assets/dashboards/qdrant_overview.json @@ -1039,6 +1039,363 @@ "x": 0, "y": 20, "width": 12, + "height": 1 + } + }, + { + "id": 1813047088833060, + "definition": { + "title": "Memory/Hardware Statistics", + "background_color": "vivid_yellow", + "show_title": true, + "type": "group", + "layout_type": "ordered", + "widgets": [ + { + "id": 5441680164423334, + "definition": { + "type": "note", + "content": "These metrics help assess the memory and hardware usage of the running Qdrant instance.", + "background_color": "yellow", + "font_size": "14", + "text_align": "center", + "vertical_align": "center", + "show_tick": false, + "tick_pos": "50%", + "tick_edge": "left", + "has_padding": true + }, + "layout": { + "x": 0, + "y": 0, + "width": 12, + "height": 1 + } + }, + { + "id": 8032518802625521, + "definition": { + "title": "Total Allocated Memory", + "title_size": "16", + "title_align": "left", + "show_legend": true, + "legend_layout": "auto", + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "time": {}, + "type": "timeseries", + "requests": [ + { + "formulas": [ + { + "number_format": { + "unit": { + "type": "canonical_unit", + "unit_name": "byte" + } + }, + "formula": "query1" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "avg:qdrant.memory.allocated.bytes{$host}" + } + ], + "response_format": "timeseries", + "style": { + "palette": "dog_classic", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "line" + } + ] + }, + "layout": { + "x": 0, + "y": 1, + "width": 6, + "height": 2 + } + }, + { + "id": 7082919554448487, + "definition": { + "title": "Total Active Pages", + "title_size": "16", + "title_align": "left", + "show_legend": true, + "legend_layout": "auto", + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "time": {}, + "type": "timeseries", + "requests": [ + { + "formulas": [ + { + "number_format": { + "unit": { + "type": "canonical_unit", + "unit_name": "byte" + } + }, + "formula": "query1" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query1", + "query": "avg:qdrant.memory.active.bytes{$host}" + } + ], + "response_format": "timeseries", + "style": { + "palette": "dog_classic", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "line" + } + ] + }, + "layout": { + "x": 6, + "y": 1, + "width": 6, + "height": 2 + } + }, + { + "id": 175987394328154, + "definition": { + "title": "Total Memory For Metadata", + "title_size": "16", + "title_align": "left", + "show_legend": true, + "legend_layout": "auto", + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "time": {}, + "type": "timeseries", + "requests": [ + { + "formulas": [ + { + "number_format": { + "unit": { + "type": "canonical_unit", + "unit_name": "byte" + } + }, + "formula": "query2" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query2", + "query": "avg:qdrant.memory.metadata.bytes{$host}" + } + ], + "response_format": "timeseries", + "style": { + "palette": "dog_classic", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "line" + } + ] + }, + "layout": { + "x": 0, + "y": 3, + "width": 6, + "height": 2 + } + }, + { + "id": 3706444032444287, + "definition": { + "title": "Max Physically Resident Data Pages", + "title_size": "16", + "title_align": "left", + "show_legend": true, + "legend_layout": "auto", + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "time": {}, + "type": "timeseries", + "requests": [ + { + "formulas": [ + { + "number_format": { + "unit": { + "type": "canonical_unit", + "unit_name": "byte" + } + }, + "formula": "query2" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query2", + "query": "avg:qdrant.memory.resident.bytes{$host}" + } + ], + "response_format": "timeseries", + "style": { + "palette": "dog_classic", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "line" + } + ] + }, + "layout": { + "x": 6, + "y": 3, + "width": 6, + "height": 2 + } + }, + { + "id": 8396142645919885, + "definition": { + "title": "Total Virtual Memory Mappings", + "title_size": "16", + "title_align": "left", + "show_legend": true, + "legend_layout": "auto", + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "time": {}, + "type": "timeseries", + "requests": [ + { + "formulas": [ + { + "number_format": { + "unit": { + "type": "canonical_unit", + "unit_name": "byte" + } + }, + "formula": "query2" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query2", + "query": "avg:qdrant.memory.retained.bytes{$host}" + } + ], + "response_format": "timeseries", + "style": { + "palette": "dog_classic", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "line" + } + ] + }, + "layout": { + "x": 0, + "y": 5, + "width": 6, + "height": 2 + } + }, + { + "id": 1903746319505022, + "definition": { + "title": "CPU Measurements", + "title_size": "16", + "title_align": "left", + "show_legend": true, + "legend_layout": "auto", + "legend_columns": [ + "avg", + "min", + "max", + "value", + "sum" + ], + "time": {}, + "type": "timeseries", + "requests": [ + { + "formulas": [ + { + "formula": "query2" + } + ], + "queries": [ + { + "data_source": "metrics", + "name": "query2", + "query": "avg:qdrant.collection.hardware.metric.cpu{$host}" + } + ], + "response_format": "timeseries", + "style": { + "palette": "dog_classic", + "line_type": "solid", + "line_width": "normal" + }, + "display_type": "line" + } + ] + }, + "layout": { + "x": 6, + "y": 5, + "width": 6, + "height": 2 + } + } + ] + }, + "layout": { + "x": 0, + "y": 21, + "width": 12, "height": 8 } } diff --git a/qdrant/datadog_checks/qdrant/metrics.py b/qdrant/datadog_checks/qdrant/metrics.py index edc388069e..2b8808e923 100644 --- a/qdrant/datadog_checks/qdrant/metrics.py +++ b/qdrant/datadog_checks/qdrant/metrics.py @@ -20,4 +20,10 @@ "cluster_commit": "cluster.commit", "cluster_pending_operations_total": "cluster.pending.operations.total", "cluster_voter": "cluster.voter", + "memory_active_bytes": "memory.active.bytes", + "memory_allocated_bytes": "memory.allocated.bytes", + "memory_metadata_bytes": "memory.metadata.bytes", + "memory_resident_bytes": "memory.resident.bytes", + "memory_retained_bytes": "memory.retained.bytes", + "collection_hardware_metric_cpu": "collection.hardware.metric.cpu" } diff --git a/qdrant/metadata.csv b/qdrant/metadata.csv index f2a40a08a1..093e4f7b75 100644 --- a/qdrant/metadata.csv +++ b/qdrant/metadata.csv @@ -20,3 +20,9 @@ qdrant.cluster.term.count,count,,,,Current cluster term,0,qdrant,qdrant cluster qdrant.cluster.commit.count,count,,,,Index of last committed operation cluster peer is aware of,0,qdrant,qdrant cluster commit, qdrant.cluster.pending.operations.total,gauge,,,, Total number of pending operations for cluster peer,0,qdrant,qdrant cluster pending operations, qdrant.cluster.voter,gauge,,,,Whether the cluster peer is a voter or learner,0,qdrant,qdrant cluster voter, +qdrant.memory.active.bytes,gauge,,,,Total number of bytes in active pages allocated by the application,0,qdrant,qdrant memory active bytes, +qdrant.memory.allocated.bytes,gauge,,,,Total number of bytes allocated by the application,0,qdrant,qdrant memory allocated bytes, +qdrant.memory.metadata.bytes,gauge,,,,Total number of bytes dedicated to metadata,0,qdrant,qdrant memory metadata bytes, +qdrant.memory.resident.bytes,gauge,,,,Maximum number of bytes in physically resident data pages mapped,0,qdrant,qdrant memory resident bytes, +qdrant.memory.retained.bytes,gauge,,,,Total number of bytes in virtual memory mappings,0,qdrant,qdrant memory retained bytes, +qdrant.collection.hardware.metric.cpu,gauge,,,,CPU measurements of a collection,0,qdrant,qdrant collection hardware metric cpu, \ No newline at end of file diff --git a/qdrant/tests/common.py b/qdrant/tests/common.py index 873bd3685f..c1e2f04661 100644 --- a/qdrant/tests/common.py +++ b/qdrant/tests/common.py @@ -26,4 +26,10 @@ def read_file(file_path, is_json=False): "qdrant.grpc.responses.fail.count", "qdrant.rest.responses.count", "qdrant.rest.responses.fail.count", + "qdrant.collection.hardware.metric.cpu", + "qdrant.memory.active.bytes", + "qdrant.memory.metadata.bytes", + "qdrant.memory.resident.bytes", + "qdrant.memory.retained.bytes", + "qdrant.memory.allocated.bytes", ] diff --git a/qdrant/tests/data/metrics.txt b/qdrant/tests/data/metrics.txt index 3e16098461..cdc745c74a 100644 --- a/qdrant/tests/data/metrics.txt +++ b/qdrant/tests/data/metrics.txt @@ -3,10 +3,10 @@ app_status_recovery_mode 0 # HELP collections_total number of collections # TYPE collections_total gauge -collections_total 1 +collections_total 6 # HELP collections_vector_total total number of vectors in all collections # TYPE collections_vector_total gauge -collections_vector_total 5417 +collections_vector_total 31732 # HELP cluster_enabled is cluster support enabled # TYPE cluster_enabled gauge cluster_enabled 1 @@ -15,10 +15,10 @@ cluster_enabled 1 cluster_peers_total 1 # HELP cluster_term current cluster term # TYPE cluster_term counter -cluster_term 15 +cluster_term 26 # HELP cluster_commit index of last committed (finalized) operation cluster peer is aware of # TYPE cluster_commit counter -cluster_commit{peer_id="7921884851411016"} 2285 +cluster_commit{peer_id="7921884851411016"} 2425 # HELP cluster_pending_operations_total total number of pending operations for cluster peer # TYPE cluster_pending_operations_total gauge cluster_pending_operations_total 0 @@ -91,23 +91,66 @@ rest_responses_duration_seconds_sum{method="POST",endpoint="/collections/{name}/ rest_responses_duration_seconds_count{method="POST",endpoint="/collections/{name}/points/count",status="200"} 15 # HELP grpc_responses_total total number of responses # TYPE grpc_responses_total counter -grpc_responses_total{endpoint="/qdrant.Points/Query"} 60 +grpc_responses_total{endpoint="/qdrant.Points/Upsert"} 6 +grpc_responses_total{endpoint="/qdrant.Points/Query"} 1 +grpc_responses_total{endpoint="/qdrant.Points/Scroll"} 6 # HELP grpc_responses_fail_total total number of failed responses # TYPE grpc_responses_fail_total counter +grpc_responses_fail_total{endpoint="/qdrant.Points/Upsert"} 0 grpc_responses_fail_total{endpoint="/qdrant.Points/Query"} 0 +grpc_responses_fail_total{endpoint="/qdrant.Points/Scroll"} 0 # HELP grpc_responses_avg_duration_seconds average response duration # TYPE grpc_responses_avg_duration_seconds gauge -grpc_responses_avg_duration_seconds{endpoint="/qdrant.Points/Query"} 0.00151775830078125 +grpc_responses_avg_duration_seconds{endpoint="/qdrant.Points/Upsert"} 0.0096579775390625 +grpc_responses_avg_duration_seconds{endpoint="/qdrant.Points/Query"} 0.001953 +grpc_responses_avg_duration_seconds{endpoint="/qdrant.Points/Scroll"} 0.0006911055297851563 # HELP grpc_responses_min_duration_seconds minimum response duration # TYPE grpc_responses_min_duration_seconds gauge -grpc_responses_min_duration_seconds{endpoint="/qdrant.Points/Query"} 0.001059 +grpc_responses_min_duration_seconds{endpoint="/qdrant.Points/Upsert"} 0.000996 +grpc_responses_min_duration_seconds{endpoint="/qdrant.Points/Query"} 0.001953 +grpc_responses_min_duration_seconds{endpoint="/qdrant.Points/Scroll"} 0.000309 # HELP grpc_responses_max_duration_seconds maximum response duration # TYPE grpc_responses_max_duration_seconds gauge -grpc_responses_max_duration_seconds{endpoint="/qdrant.Points/Query"} 0.004388 +grpc_responses_max_duration_seconds{endpoint="/qdrant.Points/Upsert"} 0.01462 +grpc_responses_max_duration_seconds{endpoint="/qdrant.Points/Query"} 0.001953 +grpc_responses_max_duration_seconds{endpoint="/qdrant.Points/Scroll"} 0.002253 # HELP grpc_responses_duration_seconds response duration histogram # TYPE grpc_responses_duration_seconds histogram +grpc_responses_duration_seconds_bucket{endpoint="/qdrant.Points/Upsert",le="0.0005"} 0 +grpc_responses_duration_seconds_bucket{endpoint="/qdrant.Points/Upsert",le="0.001"} 1 +grpc_responses_duration_seconds_bucket{endpoint="/qdrant.Points/Upsert",le="0.005"} 4 +grpc_responses_duration_seconds_bucket{endpoint="/qdrant.Points/Upsert",le="0.01"} 4 +grpc_responses_duration_seconds_bucket{endpoint="/qdrant.Points/Upsert",le="0.05"} 6 +grpc_responses_duration_seconds_bucket{endpoint="/qdrant.Points/Upsert",le="+Inf"} 6 +grpc_responses_duration_seconds_sum{endpoint="/qdrant.Points/Upsert"} 0.0332 +grpc_responses_duration_seconds_count{endpoint="/qdrant.Points/Upsert"} 6 grpc_responses_duration_seconds_bucket{endpoint="/qdrant.Points/Query",le="0.001"} 0 -grpc_responses_duration_seconds_bucket{endpoint="/qdrant.Points/Query",le="0.005"} 60 -grpc_responses_duration_seconds_bucket{endpoint="/qdrant.Points/Query",le="+Inf"} 60 -grpc_responses_duration_seconds_sum{endpoint="/qdrant.Points/Query"} 0.087005 -grpc_responses_duration_seconds_count{endpoint="/qdrant.Points/Query"} 60 \ No newline at end of file +grpc_responses_duration_seconds_bucket{endpoint="/qdrant.Points/Query",le="0.005"} 1 +grpc_responses_duration_seconds_bucket{endpoint="/qdrant.Points/Query",le="+Inf"} 1 +grpc_responses_duration_seconds_sum{endpoint="/qdrant.Points/Query"} 0.001953 +grpc_responses_duration_seconds_count{endpoint="/qdrant.Points/Query"} 1 +grpc_responses_duration_seconds_bucket{endpoint="/qdrant.Points/Scroll",le="0.0001"} 0 +grpc_responses_duration_seconds_bucket{endpoint="/qdrant.Points/Scroll",le="0.0005"} 2 +grpc_responses_duration_seconds_bucket{endpoint="/qdrant.Points/Scroll",le="0.001"} 5 +grpc_responses_duration_seconds_bucket{endpoint="/qdrant.Points/Scroll",le="0.005"} 6 +grpc_responses_duration_seconds_bucket{endpoint="/qdrant.Points/Scroll",le="+Inf"} 6 +grpc_responses_duration_seconds_sum{endpoint="/qdrant.Points/Scroll"} 0.004966 +grpc_responses_duration_seconds_count{endpoint="/qdrant.Points/Scroll"} 6 +# HELP collection_hardware_metric_cpu CPU measurements of a collection +# TYPE collection_hardware_metric_cpu gauge +collection_hardware_metric_cpu{id="rag2"} 159744 +# HELP memory_active_bytes Total number of bytes in active pages allocated by the application +# TYPE memory_active_bytes gauge +memory_active_bytes 211828736 +# HELP memory_allocated_bytes Total number of bytes allocated by the application +# TYPE memory_allocated_bytes gauge +memory_allocated_bytes 205169536 +# HELP memory_metadata_bytes Total number of bytes dedicated to metadata +# TYPE memory_metadata_bytes gauge +memory_metadata_bytes 7406976 +# HELP memory_resident_bytes Maximum number of bytes in physically resident data pages mapped +# TYPE memory_resident_bytes gauge +memory_resident_bytes 219181056 +# HELP memory_retained_bytes Total number of bytes in virtual memory mappings +# TYPE memory_retained_bytes gauge +memory_retained_bytes 146206720 \ No newline at end of file From 6034fe5b62f380f1d344717d167334c70284b8be Mon Sep 17 00:00:00 2001 From: Anush008 Date: Fri, 20 Dec 2024 12:26:23 +0530 Subject: [PATCH 2/4] chore: Updated CHANGELOG.md Signed-off-by: Anush008 --- qdrant/CHANGELOG.md | 7 ++++++- qdrant/README.md | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/qdrant/CHANGELOG.md b/qdrant/CHANGELOG.md index d4d527a9ba..929b19a8d9 100644 --- a/qdrant/CHANGELOG.md +++ b/qdrant/CHANGELOG.md @@ -1,8 +1,13 @@ # CHANGELOG - Qdrant +## 1.1.0 / 2024-12-20 + +***Added***: + +* Support for new Hardware/Memory metrics. + ## 1.0.0 / 2024-08-10 ***Added***: * Initial Release - diff --git a/qdrant/README.md b/qdrant/README.md index af080478a7..50e9bb6c32 100644 --- a/qdrant/README.md +++ b/qdrant/README.md @@ -20,7 +20,7 @@ For Agent v7.21+ / v6.21+, follow the instructions below to install the Qdrant c 1. Run the following command to install the Agent integration: ```shell - datadog-agent integration install -t qdrant==1.0.0 + datadog-agent integration install -t qdrant==1.1.0 ``` 2. Configure your integration similar to core [integrations][4]. From 168413c7ef00712d199ba8a35b40075db1e39fd0 Mon Sep 17 00:00:00 2001 From: Anush008 Date: Fri, 20 Dec 2024 12:34:09 +0530 Subject: [PATCH 3/4] chore: formatting Signed-off-by: Anush008 --- qdrant/datadog_checks/qdrant/metrics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qdrant/datadog_checks/qdrant/metrics.py b/qdrant/datadog_checks/qdrant/metrics.py index 2b8808e923..4841593593 100644 --- a/qdrant/datadog_checks/qdrant/metrics.py +++ b/qdrant/datadog_checks/qdrant/metrics.py @@ -25,5 +25,5 @@ "memory_metadata_bytes": "memory.metadata.bytes", "memory_resident_bytes": "memory.resident.bytes", "memory_retained_bytes": "memory.retained.bytes", - "collection_hardware_metric_cpu": "collection.hardware.metric.cpu" + "collection_hardware_metric_cpu": "collection.hardware.metric.cpu", } From e7af8c5d98aa44907de7630e775007d72bd7c1bd Mon Sep 17 00:00:00 2001 From: Anush Date: Mon, 6 Jan 2025 20:47:02 +0530 Subject: [PATCH 4/4] Update README.md --- qdrant/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qdrant/README.md b/qdrant/README.md index 50e9bb6c32..bbde507f2d 100644 --- a/qdrant/README.md +++ b/qdrant/README.md @@ -27,7 +27,7 @@ For Agent v7.21+ / v6.21+, follow the instructions below to install the Qdrant c ### Configuration -1. Edit the `qdrant.d/conf.yaml` file in the `conf.d/` folder at the root of your [Agent's configuration directory][7] to start collecting your Qdrant [metrics](#metrics). +1. Edit the `conf.d/qdrant.d/conf.yaml` file in your [Agent's configuration directory][7] to start collecting your Qdrant [metrics](#metrics) Most importantly, set the `openmetrics_endpoint` value to the [`/metrics` endpoint](https://qdrant.tech/documentation/guides/monitoring/#monitoring) of your Qdrant instance.