Skip to content

Commit

Permalink
[AI-4453] Add Milvus recommended monitors (#19337)
Browse files Browse the repository at this point in the history
* Add milvus recommended monitors

* Update milvus/assets/monitors/dml_channel_lag.json

Co-authored-by: Heston Hoffman <[email protected]>

* Switch monitors from sum to average

---------

Co-authored-by: Heston Hoffman <[email protected]>
  • Loading branch information
dkirov-dd and hestonhoffman authored Jan 24, 2025
1 parent 3660d76 commit d3b9825
Show file tree
Hide file tree
Showing 4 changed files with 78 additions and 0 deletions.
23 changes: 23 additions & 0 deletions milvus/assets/monitors/dml_channel_lag.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
{
"version": 2,
"created_at": "2025-01-03",
"last_updated_at": "2025-01-03",
"title": "DML channel lag",
"description": "This monitor measures the lag in the DML channels. High lag indicates synchronization issues, which can affect real-time data consistency and query freshness.",
"definition": {
"name": "Significant lag in DML channel",
"type": "query alert",
"query": "avg(last_5m):avg:milvus.datacoord.consume_datanode_tt_lag_ms{*} > 5000",
"message": "{{#is_alert}}DML channel lag exceeds {{threshold}}.{{/is_alert}}\n{{#is_recovery}}DML channel lag has returned to normal levels.{{/is_recovery}}",
"tags": ["integration:milvus"],
"options": {
"thresholds": {
"critical": 5000
},
"notify_audit": false,
"on_missing_data": "default",
"include_tags": false
}
},
"tags": ["integration:milvus"]
}
24 changes: 24 additions & 0 deletions milvus/assets/monitors/index_build_latency.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{
"version": 2,
"created_at": "2025-01-03",
"last_updated_at": "2025-01-03",
"title": "Build index latency",
"description": "This monitor measures the time taken for index builds to complete. Index build latency directly affects the availability of new data for search queries.",
"definition": {
"name": "High index build latency",
"type": "query alert",
"query": "sum(last_5m):avg:milvus.build_latency.sum{*}.as_count() > 2",
"message": "{{#is_alert}}Index build latency has exceeded {{threshold}}.{{/is_alert}}\n{{#is_recovery}}Index build latency has returned to normal levels.{{/is_recovery}}",
"tags": ["integration:milvus"],
"options": {
"thresholds": {
"critical": 2,
"warning": 1
},
"notify_audit": false,
"on_missing_data": "default",
"include_tags": false
}
},
"tags": ["integration:milvus"]
}
26 changes: 26 additions & 0 deletions milvus/assets/monitors/request_latency.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
"version": 2,
"created_at": "2025-01-03",
"last_updated_at": "2025-01-03",
"title": "Request latency",
"description": "This monitor measures the time taken for requests to complete. High latency can indicate performance bottlenecks, such as inefficient code, resource contention, or hardware limitations. Monitoring request latency helps identify these issues.",
"definition": {
"name": "Request latency is high",
"type": "query alert",
"query": "sum(last_5m):avg:milvus.proxy.req.latency.sum{*}.as_count() > 500",
"message": "{{#is_alert}}Request latency exceeds {{threshold}} milliseconds.{{/is_alert}}\n{{#is_recovery}}Request latency has returned to acceptable levels.{{/is_recovery}}",
"tags": ["integration:milvus"],
"options": {
"thresholds": {
"critical": 500,
"warning": 400
},
"notify_audit": false,
"on_missing_data": "default",
"include_tags": false,
"avalanche_window": 10
},
"priority": null
},
"tags": ["integration:milvus"]
}
5 changes: 5 additions & 0 deletions milvus/manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,11 @@
},
"dashboards": {
"Milvus Overview": "assets/dashboards/milvus_overview.json"
},
"monitors": {
"DML channel lag": "assets/monitors/dml_channel_lag.json",
"Request latency": "assets/monitors/request_latency.json",
"Index build latency": "assets/monitors/index_build_latency.json"
}
},
"author": {
Expand Down

0 comments on commit d3b9825

Please sign in to comment.