Skip to content

Commit

Permalink
install Prometheus as subchart
Browse files Browse the repository at this point in the history
  • Loading branch information
kondratyevd committed Feb 11, 2025
1 parent 1b33d5a commit e984033
Show file tree
Hide file tree
Showing 9 changed files with 121 additions and 135 deletions.
6 changes: 5 additions & 1 deletion helm/supersonic/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ annotations:
dependencies:
- name: grafana
version: "8.9.0" # Latest stable version as of now
version: "8.9.0"
repository: https://grafana.github.io/helm-charts
condition: grafana-legacy.enabled
- name: prometheus
version: "27.3.0"
repository: https://prometheus-community.github.io/helm-charts
condition: prometheus.enabled
16 changes: 12 additions & 4 deletions helm/supersonic/templates/_prometheus.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -43,15 +43,23 @@ Get existing Prometheus service name (from any release)
Get Prometheus URL (handles external, ingress, existing, and new instances)
*/}}
{{- define "supersonic.prometheusUrl" -}}
{{- if (not .Values.prometheus.external) -}}
{{- if .Values.prometheus.external -}}
{{- if .Values.prometheus.url -}}
{{ .Values.prometheus.scheme }}://{{ .Values.prometheus.url }}
{{- end -}}
{{- else if .Values.prometheus.enabled -}}
{{- if .Values.prometheus.ingress.enabled -}}
https://{{ .Values.prometheus.ingress.hostName }}
{{- else -}}
http://{{ include "supersonic.prometheusName" . }}.{{ .Release.Namespace }}.svc.cluster.local:{{ .Values.prometheus.server.service.servicePort }}
{{- end -}}
{{- else -}}
{{- if .Values.prometheus.ingress.enabled -}}
https://{{ .Values.prometheus.ingress.hostName }}
{{- else -}}
http://{{ include "supersonic.prometheusName" . }}.{{ .Release.Namespace }}.svc.cluster.local:9090
{{- end -}}
{{- else if .Values.prometheus.url -}}
{{ .Values.prometheus.scheme }}://{{ .Values.prometheus.url }}
{{- end }}
{{- end -}}
{{- end }}

{{/*
Expand Down
28 changes: 28 additions & 0 deletions helm/supersonic/templates/grafana-datasources.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
{{- if .Values.grafana.enabled }}
apiVersion: v1
kind: ConfigMap
metadata:
name: {{ .Release.Name }}-grafana-datasources
labels:
grafana_datasource: "1"
data:
datasources.yaml: |
apiVersion: 1
datasources:
- name: prometheus
type: prometheus
access: proxy
isDefault: true
{{- if .Values.prometheus.external }}
url: {{ .Values.prometheus.scheme }}://{{ .Values.prometheus.url }}
{{- else if .Values.prometheus.enabled }}
url: http://{{ .Release.Name }}-prometheus-server.{{ .Release.Namespace }}.svc.cluster.local:9090
{{- else }}
url: http://{{ include "supersonic.prometheusName" . }}.{{ .Release.Namespace }}.svc.cluster.local:9090
{{- end }}
jsonData:
timeInterval: "5s"
{{- if and .Values.prometheus.external (eq .Values.prometheus.scheme "https") }}
tlsSkipVerify: true
{{- end }}
{{- end }}
6 changes: 3 additions & 3 deletions helm/supersonic/templates/prometheus-configmap.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{{- if not .Values.prometheus.external }}
{{- if and .Values.prometheus.enabled (not .Values.prometheus.external) }}
apiVersion: v1
kind: ConfigMap
metadata:
Expand All @@ -10,8 +10,8 @@ metadata:
data:
prometheus.yml: |
global:
scrape_interval: 5s
evaluation_interval: 5s
scrape_interval: {{ .Values.prometheus.server.global.scrape_interval }}
evaluation_interval: {{ .Values.prometheus.server.global.evaluation_interval }}
scrape_configs:
- job_name: "{{ include "supersonic.tritonName" . }}"
Expand Down
56 changes: 0 additions & 56 deletions helm/supersonic/templates/prometheus-deployment.yaml

This file was deleted.

31 changes: 0 additions & 31 deletions helm/supersonic/templates/prometheus-ingress.yaml

This file was deleted.

21 changes: 0 additions & 21 deletions helm/supersonic/templates/prometheus-service.yaml

This file was deleted.

57 changes: 57 additions & 0 deletions helm/supersonic/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,9 @@ prometheus:
# -- Whether to use external Prometheus instance (true) or deploy internal one (false)
external: true

# -- Enable or disable Prometheus deployment via subchart
enabled: false # Set to true to deploy Prometheus via subchart

# -- External Prometheus server url and port number (find in documentation of a given cluster or ask admins)
# Only used when external=true
url: ""
Expand All @@ -193,6 +196,56 @@ prometheus:
ingressClassName: ""
annotations: {}

# -- Prometheus Helm chart configuration (https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus)
server:
useExistingClusterRoleName: supersonic-prometheus-role
releaseNamespace: true
persistentVolume:
enabled: false
resources:
requests:
cpu: 500m
memory: 512Mi
limits:
cpu: 1
memory: 1Gi
retention: 15d
global:
scrape_interval: 5s
evaluation_interval: 5s
service:
enabled: true
servicePort: 9090
configMapOverrideName: prometheus-config
ingress:
enabled: true
hosts: []
ingressClassName: ""
annotations: {}
tls:
- hosts: []

serviceAccounts:
server:
create: false
name: supersonic-prometheus-sa

rbac:
create: false
alertmanager:
enabled: false
pushgateway:
enabled: false
kube-state-metrics:
enabled: false
prometheus-node-exporter:
enabled: false
prometheus-pushgateway:
enabled: false
configmapReload:
prometheus:
enabled: false

ingress:
enabled: false
hostName: ""
Expand Down Expand Up @@ -244,6 +297,10 @@ grafana:
type: prometheus
access: proxy
isDefault: true
url: http://supersonic-prometheus-server:9090
jsonData:
timeInterval: "5s"
tlsSkipVerify: true

# -- Grafana dashboard providers configuration
dashboardProviders:
Expand Down
35 changes: 16 additions & 19 deletions values/values-nautilus-cms.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ triton:
--exit-timeout-secs=60 \
--backend-config=onnxruntime,enable-global-threadpool=1
resources:
limits: { cpu: 2, memory: 4G, nvidia.com/a100: 1}
requests: { cpu: 2, memory: 4G, nvidia.com/a100: 1}
limits: { cpu: 2, memory: 4G, nvidia.com/gpu: 1}
requests: { cpu: 2, memory: 4G, nvidia.com/gpu: 1}
modelRepository:
enabled: true
storageType: cvmfs-pvc
Expand All @@ -22,15 +22,21 @@ envoy:
enabled: true
loadBalancerPolicy: "ROUND_ROBIN"
prometheus:
enabled: true
external: false
url: prometheus.nrp-nautilus.io
port: 443
scheme: https
serverLoadThreshold: 100
ingress:
enabled: true
hostName: prometheus-cms.nrp-nautilus.io
ingressClassName: haproxy
server:
ingress:
enabled: true
hosts:
- prometheus-cms.nrp-nautilus.io
tls:
- hosts:
- prometheus-cms.nrp-nautilus.io
ingressClassName: haproxy
annotations:
haproxy-ingress.github.io/cors-enable: "true"
haproxy-ingress.github.io/proxy-body-size: "512m"
haproxy-ingress.github.io/timeout-http-request: "5m"

autoscaler:
enabled: true
Expand Down Expand Up @@ -67,15 +73,6 @@ grafana-legacy:
grafana:
enabled: true

datasources:
datasources.yaml:
datasources:
- name: prometheus
type: prometheus
access: proxy
url: https://prometheus.nrp-nautilus.io
isDefault: true

ingress:
enabled: true
hosts:
Expand Down

0 comments on commit e984033

Please sign in to comment.