Skip to content

Commit

Permalink
updating panels to reflect metric name change
Browse files Browse the repository at this point in the history
slurmjob_info -> rmsjob_info

Signed-off-by: Karl W Schulz <[email protected]>
  • Loading branch information
koomie committed Jul 15, 2024
1 parent c6585e2 commit 0ed33b8
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 24 deletions.
2 changes: 1 addition & 1 deletion docker/grafana/json-models/index.json
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@
"uid" : "prometheus"
},
"editorMode" : "code",
"expr" : "timestamp(group by (jobid,user,batchflag,partition,nodes) (slurmjob_info{jobid=~\"^\\\\d+\"}))",
"expr" : "timestamp(group by (jobid,user,batchflag,partition,nodes) (rmsjob_info{jobid=~\"^\\\\d+\"}))",
"format" : "table",
"hide" : false,
"instant" : false,
Expand Down
46 changes: 23 additions & 23 deletions docker/grafana/json-models/job.json
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@
},
"editorMode" : "code",
"exemplar" : false,
"expr" : "(slurmjob_info{jobid=\"$jobid\"})",
"expr" : "(rmsjob_info{jobid=\"$jobid\"})",
"instant" : false,
"legendFormat" : "{{batchflag}}",
"range" : true,
Expand Down Expand Up @@ -270,7 +270,7 @@
"uid" : "$source"
},
"editorMode" : "code",
"expr" : "slurmjob_info{jobid=\"$jobid\"}",
"expr" : "rmsjob_info{jobid=\"$jobid\"}",
"instant" : false,
"legendFormat" : "__auto",
"range" : true,
Expand Down Expand Up @@ -362,7 +362,7 @@
"uid" : "$source"
},
"editorMode" : "code",
"expr" : "avg(avg(label_replace({__name__=~\"card(.*)_rocm_utilization\"}, \"card\", \"$1\", \"__name__\", \"card(.*)_rocm_utilization\") * on (instance) group_left() slurmjob_info{jobid=\"$jobid\"}) by (card))",
"expr" : "avg(avg(label_replace({__name__=~\"card(.*)_rocm_utilization\"}, \"card\", \"$1\", \"__name__\", \"card(.*)_rocm_utilization\") * on (instance) group_left() rmsjob_info{jobid=\"$jobid\"}) by (card))",
"instant" : false,
"legendFormat" : "GPU Core",
"range" : true,
Expand All @@ -374,7 +374,7 @@
"uid" : "$source"
},
"editorMode" : "code",
"expr" : "avg(100 * avg(label_replace({__name__=~\"card(.*)_rocm_vram_used\"}, \"card\", \"$1\", \"__name__\", \"card(.*)_rocm_vram_used\") * on (instance) group_left() slurmjob_info{jobid=\"$jobid\"}) by (card) / avg(label_replace({__name__=~\"card(.*)_rocm_vram_total\"}, \"card\", \"$1\", \"__name__\", \"card(.*)_rocm_vram_total\") * on (instance) group_left() slurmjob_info{jobid=\"$jobid\"}) by (card))",
"expr" : "avg(100 * avg(label_replace({__name__=~\"card(.*)_rocm_vram_used\"}, \"card\", \"$1\", \"__name__\", \"card(.*)_rocm_vram_used\") * on (instance) group_left() rmsjob_info{jobid=\"$jobid\"}) by (card) / avg(label_replace({__name__=~\"card(.*)_rocm_vram_total\"}, \"card\", \"$1\", \"__name__\", \"card(.*)_rocm_vram_total\") * on (instance) group_left() rmsjob_info{jobid=\"$jobid\"}) by (card))",
"hide" : false,
"instant" : false,
"legendFormat" : "GPU Memory",
Expand Down Expand Up @@ -445,7 +445,7 @@
"disableTextWrap" : false,
"editorMode" : "builder",
"exemplar" : false,
"expr" : "slurmjob_info{jobid=\"$jobid\"}",
"expr" : "rmsjob_info{jobid=\"$jobid\"}",
"fullMetaSearch" : false,
"includeNullMetadata" : true,
"instant" : false,
Expand Down Expand Up @@ -534,7 +534,7 @@
},
"editorMode" : "code",
"exemplar" : false,
"expr" : "slurmjob_info{jobid=\"$jobid\"}",
"expr" : "rmsjob_info{jobid=\"$jobid\"}",
"instant" : false,
"legendFormat" : "__auto",
"range" : true,
Expand Down Expand Up @@ -620,7 +620,7 @@
"uid" : "$source"
},
"editorMode" : "code",
"expr" : "max(timestamp(slurmjob_info{jobid=\"$jobid\"}))",
"expr" : "max(timestamp(rmsjob_info{jobid=\"$jobid\"}))",
"hide" : false,
"instant" : false,
"interval" : "",
Expand Down Expand Up @@ -711,7 +711,7 @@
},
"editorMode" : "code",
"exemplar" : false,
"expr" : "avg(label_replace({__name__=~\"card(.*)_rocm_utilization\"}, \"card\", \"$1\", \"__name__\", \"card(.*)_rocm_utilization\") * on (instance) group_left() slurmjob_info{jobid=\"$jobid\"}) by (card)",
"expr" : "avg(label_replace({__name__=~\"card(.*)_rocm_utilization\"}, \"card\", \"$1\", \"__name__\", \"card(.*)_rocm_utilization\") * on (instance) group_left() rmsjob_info{jobid=\"$jobid\"}) by (card)",
"instant" : false,
"legendFormat" : "Card: {{card}}",
"range" : true,
Expand Down Expand Up @@ -810,7 +810,7 @@
},
"editorMode" : "code",
"exemplar" : false,
"expr" : "avg(label_replace({__name__=~\"card(.*)_rocm_utilization\"}, \"card\", \"$1\", \"__name__\", \"card(.*)_rocm_utilization\") * on (instance) group_left() slurmjob_info{jobid=\"$jobid\"}) by (card)",
"expr" : "avg(label_replace({__name__=~\"card(.*)_rocm_utilization\"}, \"card\", \"$1\", \"__name__\", \"card(.*)_rocm_utilization\") * on (instance) group_left() rmsjob_info{jobid=\"$jobid\"}) by (card)",
"format" : "time_series",
"instant" : false,
"interval" : "",
Expand Down Expand Up @@ -888,7 +888,7 @@
},
"editorMode" : "code",
"exemplar" : false,
"expr" : "100 * max(label_replace({__name__=~\"card(.*)_rocm_vram_used\"}, \"card\", \"$1\", \"__name__\", \"card(.*)_rocm_vram_used\") * on (instance) group_left() slurmjob_info{jobid=\"$jobid\"}) by (card) / max(label_replace({__name__=~\"card(.*)_rocm_vram_total\"}, \"card\", \"$1\", \"__name__\", \"card(.*)_rocm_vram_total\") * on (instance) group_left() slurmjob_info{jobid=\"$jobid\"}) by (card)",
"expr" : "100 * max(label_replace({__name__=~\"card(.*)_rocm_vram_used\"}, \"card\", \"$1\", \"__name__\", \"card(.*)_rocm_vram_used\") * on (instance) group_left() rmsjob_info{jobid=\"$jobid\"}) by (card) / max(label_replace({__name__=~\"card(.*)_rocm_vram_total\"}, \"card\", \"$1\", \"__name__\", \"card(.*)_rocm_vram_total\") * on (instance) group_left() rmsjob_info{jobid=\"$jobid\"}) by (card)",
"instant" : false,
"legendFormat" : "Card: {{card}}",
"range" : true,
Expand Down Expand Up @@ -987,7 +987,7 @@
},
"editorMode" : "code",
"exemplar" : false,
"expr" : "100 * avg(label_replace({__name__=~\"card(.*)_rocm_vram_used\"}, \"card\", \"$1\", \"__name__\", \"card(.*)_rocm_vram_used\") * on (instance) group_left() slurmjob_info{jobid=\"$jobid\"}) by (card) / max(label_replace({__name__=~\"card(.*)_rocm_vram_total\"}, \"card\", \"$1\", \"__name__\", \"card(.*)_rocm_vram_total\") * on (instance) group_left() slurmjob_info{jobid=\"$jobid\"}) by (card)",
"expr" : "100 * avg(label_replace({__name__=~\"card(.*)_rocm_vram_used\"}, \"card\", \"$1\", \"__name__\", \"card(.*)_rocm_vram_used\") * on (instance) group_left() rmsjob_info{jobid=\"$jobid\"}) by (card) / max(label_replace({__name__=~\"card(.*)_rocm_vram_total\"}, \"card\", \"$1\", \"__name__\", \"card(.*)_rocm_vram_total\") * on (instance) group_left() rmsjob_info{jobid=\"$jobid\"}) by (card)",
"format" : "time_series",
"instant" : false,
"interval" : "",
Expand Down Expand Up @@ -1100,7 +1100,7 @@
"uid" : "$source"
},
"editorMode" : "code",
"expr" : "avg(label_replace({__name__=~\"card(.*)_rocm_utilization\"}, \"card\", \"$1\", \"__name__\", \"card(.*)_rocm_utilization\") * on (instance) group_left() slurmjob_info{jobid=\"$jobid\"}) by (card)",
"expr" : "avg(label_replace({__name__=~\"card(.*)_rocm_utilization\"}, \"card\", \"$1\", \"__name__\", \"card(.*)_rocm_utilization\") * on (instance) group_left() rmsjob_info{jobid=\"$jobid\"}) by (card)",
"instant" : false,
"legendFormat" : "Card: {{card}}",
"range" : true,
Expand Down Expand Up @@ -1198,7 +1198,7 @@
"uid" : "$source"
},
"editorMode" : "code",
"expr" : "avg(label_replace({__name__=~\"card(.*)_rocm_temp_die_edge\"}, \"card\", \"$1\", \"__name__\", \"card(.*)_rocm_temp_die_edge\") * on (instance) group_left() slurmjob_info{jobid=\"$jobid\"}) by (card)",
"expr" : "avg(label_replace({__name__=~\"card(.*)_rocm_temp_die_edge\"}, \"card\", \"$1\", \"__name__\", \"card(.*)_rocm_temp_die_edge\") * on (instance) group_left() rmsjob_info{jobid=\"$jobid\"}) by (card)",
"instant" : false,
"legendFormat" : "Card: {{card}}",
"range" : true,
Expand Down Expand Up @@ -1296,7 +1296,7 @@
"uid" : "$source"
},
"editorMode" : "code",
"expr" : "avg(label_replace({__name__=~\"card(.*)_rocm_avg_pwr\"}, \"card\", \"$1\", \"__name__\", \"card(.*)_rocm_avg_pwr\") * on (instance) group_left() slurmjob_info{jobid=\"$jobid\"}) by (card)",
"expr" : "avg(label_replace({__name__=~\"card(.*)_rocm_avg_pwr\"}, \"card\", \"$1\", \"__name__\", \"card(.*)_rocm_avg_pwr\") * on (instance) group_left() rmsjob_info{jobid=\"$jobid\"}) by (card)",
"instant" : false,
"legendFormat" : "Card: {{card}}",
"range" : true,
Expand Down Expand Up @@ -1394,7 +1394,7 @@
"uid" : "$source"
},
"editorMode" : "code",
"expr" : "avg(label_replace({__name__=~\"card(.*)_rocm_sclk_clock_mhz\"}, \"card\", \"$1\", \"__name__\", \"card(.*)_rocm_sclk_clock_mhz\") * on (instance) group_left() slurmjob_info{jobid=\"$jobid\"}) by (card)",
"expr" : "avg(label_replace({__name__=~\"card(.*)_rocm_sclk_clock_mhz\"}, \"card\", \"$1\", \"__name__\", \"card(.*)_rocm_sclk_clock_mhz\") * on (instance) group_left() rmsjob_info{jobid=\"$jobid\"}) by (card)",
"instant" : false,
"legendFormat" : "Card: {{card}}",
"range" : true,
Expand Down Expand Up @@ -1492,7 +1492,7 @@
"uid" : "$source"
},
"editorMode" : "code",
"expr" : "avg(label_replace({__name__=~\"card(.*)_rocm_mclk_clock_mhz\"}, \"card\", \"$1\", \"__name__\", \"card(.*)_rocm_mclk_clock_mhz\") * on (instance) group_left() slurmjob_info{jobid=\"$jobid\"}) by (card)",
"expr" : "avg(label_replace({__name__=~\"card(.*)_rocm_mclk_clock_mhz\"}, \"card\", \"$1\", \"__name__\", \"card(.*)_rocm_mclk_clock_mhz\") * on (instance) group_left() rmsjob_info{jobid=\"$jobid\"}) by (card)",
"instant" : false,
"legendFormat" : "Card: {{card}}",
"range" : true,
Expand Down Expand Up @@ -1711,7 +1711,7 @@
"uid" : "$source"
},
"editorMode" : "code",
"expr" : "quantile(0.20, (label_replace({__name__=~\"card(.*)_rocm_utilization\"}, \"card\", \"$1\", \"__name__\", \"card(.*)_rocm_utilization\") * on (instance) group_left() slurmjob_info{jobid=\"$jobid\"})) ",
"expr" : "quantile(0.20, (label_replace({__name__=~\"card(.*)_rocm_utilization\"}, \"card\", \"$1\", \"__name__\", \"card(.*)_rocm_utilization\") * on (instance) group_left() rmsjob_info{jobid=\"$jobid\"})) ",
"instant" : false,
"legendFormat" : "Quantile 0.2 ",
"range" : true,
Expand All @@ -1723,7 +1723,7 @@
"uid" : "$source"
},
"editorMode" : "code",
"expr" : "quantile(0.5, (label_replace({__name__=~\"card(.*)_rocm_utilization\"}, \"card\", \"$1\", \"__name__\", \"card(.*)_rocm_utilization\") * on (instance) group_left() slurmjob_info{jobid=\"$jobid\"})) ",
"expr" : "quantile(0.5, (label_replace({__name__=~\"card(.*)_rocm_utilization\"}, \"card\", \"$1\", \"__name__\", \"card(.*)_rocm_utilization\") * on (instance) group_left() rmsjob_info{jobid=\"$jobid\"})) ",
"hide" : false,
"instant" : false,
"legendFormat" : "Median",
Expand All @@ -1736,7 +1736,7 @@
"uid" : "$source"
},
"editorMode" : "code",
"expr" : "quantile(0.8, (label_replace({__name__=~\"card(.*)_rocm_utilization\"}, \"card\", \"$1\", \"__name__\", \"card(.*)_rocm_utilization\") * on (instance) group_left() slurmjob_info{jobid=\"$jobid\"})) ",
"expr" : "quantile(0.8, (label_replace({__name__=~\"card(.*)_rocm_utilization\"}, \"card\", \"$1\", \"__name__\", \"card(.*)_rocm_utilization\") * on (instance) group_left() rmsjob_info{jobid=\"$jobid\"})) ",
"hide" : false,
"instant" : false,
"legendFormat" : "Quantile 0.8",
Expand All @@ -1749,7 +1749,7 @@
"uid" : "$source"
},
"editorMode" : "code",
"expr" : "avg((label_replace({__name__=~\"card(.*)_rocm_utilization\"}, \"card\", \"$1\", \"__name__\", \"card(.*)_rocm_utilization\") * on (instance) group_left() slurmjob_info{jobid=\"$jobid\"}))",
"expr" : "avg((label_replace({__name__=~\"card(.*)_rocm_utilization\"}, \"card\", \"$1\", \"__name__\", \"card(.*)_rocm_utilization\") * on (instance) group_left() rmsjob_info{jobid=\"$jobid\"}))",
"hide" : false,
"instant" : false,
"legendFormat" : "Average",
Expand Down Expand Up @@ -1862,7 +1862,7 @@
"uid" : "$source"
},
"editorMode" : "code",
"expr" : "avg(label_replace({__name__=~\"card(.*)_rocm_utilization\"}, \"card\", \"$1\", \"__name__\", \"card(.*)_rocm_utilization\") * on (instance) group_left() slurmjob_info{jobid=\"$jobid\"}) by (instance)",
"expr" : "avg(label_replace({__name__=~\"card(.*)_rocm_utilization\"}, \"card\", \"$1\", \"__name__\", \"card(.*)_rocm_utilization\") * on (instance) group_left() rmsjob_info{jobid=\"$jobid\"}) by (instance)",
"instant" : false,
"legendFormat" : "__auto",
"range" : true,
Expand Down Expand Up @@ -2017,7 +2017,7 @@
"uid" : "$source"
},
"editorMode" : "code",
"expr" : "label_replace({__name__=~\"card(.*)_rocm_utilization\"}, \"card\", \"$1\", \"__name__\", \"card(.*)_rocm_utilization\") * on (instance) group_left() slurmjob_info{jobid=\"$jobid\"}",
"expr" : "label_replace({__name__=~\"card(.*)_rocm_utilization\"}, \"card\", \"$1\", \"__name__\", \"card(.*)_rocm_utilization\") * on (instance) group_left() rmsjob_info{jobid=\"$jobid\"}",
"instant" : false,
"legendFormat" : "__auto",
"range" : true,
Expand Down Expand Up @@ -2225,7 +2225,7 @@
"uid" : "$source"
},
"editorMode" : "code",
"expr": "avg by (marker) ((label_replace({__name__=~\"card(.*)_rocm_utilization\"}, \"card\", \"$1\", \"__name__\", \"card(.*)_rocm_utilization\")) * on (instance) group_left(jobid,marker) slurmjob_info{jobid=\"$jobid\"} * on (jobid) group_left(marker) count by (jobid,marker) (slurmjob_annotations{jobid=\"$jobid\"} > 0))",
"expr": "avg by (marker) ((label_replace({__name__=~\"card(.*)_rocm_utilization\"}, \"card\", \"$1\", \"__name__\", \"card(.*)_rocm_utilization\")) * on (instance) group_left(jobid,marker) rmsjob_info{jobid=\"$jobid\"} * on (jobid) group_left(marker) count by (jobid,marker) (slurmjob_annotations{jobid=\"$jobid\"} > 0))",
"hide" : false,
"instant" : false,
"legendFormat" : "__auto",
Expand Down

0 comments on commit 0ed33b8

Please sign in to comment.