Skip to content

Commit

Permalink
Merge pull request topolvm#947 from naemono/add-lvmd-011y
Browse files Browse the repository at this point in the history
Introduce prometheus metrics for lvmd process.
  • Loading branch information
toshipp authored Aug 30, 2024
2 parents b273c76 + 79ecad6 commit c49d008
Show file tree
Hide file tree
Showing 6 changed files with 170 additions and 9 deletions.
9 changes: 9 additions & 0 deletions charts/topolvm/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,19 @@ See [Getting Started](https://github.com/topolvm/topolvm/blob/topolvm-chart-v15.
| lvmd.labels | object | `{}` | Additional labels to be added to the Daemonset. |
| lvmd.lvcreateOptionClasses | list | `[]` | Specify the lvcreate-option-class settings. |
| lvmd.managed | bool | `true` | If true, set up lvmd service with DaemonSet. |
| lvmd.metrics.annotations | object | `{"prometheus.io/port":"metrics"}` | Annotations for Scrape used by Prometheus. |
| lvmd.metrics.enabled | bool | `true` | If true, enable scraping of metrics by Prometheus. |
| lvmd.nodeSelector | object | `{}` | Specify nodeSelector. # ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/ |
| lvmd.podLabels | object | `{}` | Additional labels to be set on the lvmd service pods. |
| lvmd.priorityClassName | string | `nil` | Specify priorityClassName. |
| lvmd.profiling.bindAddress | string | `""` | Enables pprof profiling server. If empty, profiling is disabled. |
| lvmd.prometheus.podMonitor.additionalLabels | object | `{}` | Additional labels that can be used so PodMonitor will be discovered by Prometheus. |
| lvmd.prometheus.podMonitor.enabled | bool | `false` | Set this to `true` to create PodMonitor for Prometheus operator. |
| lvmd.prometheus.podMonitor.interval | string | `""` | Scrape interval. If not set, the Prometheus default scrape interval is used. |
| lvmd.prometheus.podMonitor.metricRelabelings | list | `[]` | MetricRelabelConfigs to apply to samples before ingestion. |
| lvmd.prometheus.podMonitor.namespace | string | `""` | Optional namespace in which to create PodMonitor. |
| lvmd.prometheus.podMonitor.relabelings | list | `[]` | RelabelConfigs to apply to samples before scraping. |
| lvmd.prometheus.podMonitor.scrapeTimeout | string | `""` | Scrape timeout. If not set, the Prometheus default scrape timeout is used. |
| lvmd.socketName | string | `"/run/topolvm/lvmd.sock"` | Specify socketName. |
| lvmd.tolerations | list | `[]` | Specify tolerations. # ref: https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/ |
| lvmd.updateStrategy | object | `{}` | Specify updateStrategy. |
Expand Down
9 changes: 9 additions & 0 deletions charts/topolvm/templates/lvmd/daemonset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,11 @@ spec:
{{- end }}
annotations:
checksum/config: {{ include (print $.Template.BasePath "/lvmd/configmap.yaml") . | sha256sum }}
{{- if .Values.lvmd.metrics.enabed }}
{{- with .Values.lvmd.metrics.annotations }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- end }}
spec:
{{- with .Values.lvmd.priorityClassName }}
priorityClassName: {{ . }}
Expand Down Expand Up @@ -86,6 +91,10 @@ spec:
{{- with .Values.livenessProbe.lvmd.periodSeconds }}
periodSeconds: {{ . }}
{{- end }}
ports:
- name: metrics
containerPort: 8080
protocol: TCP
{{- with .Values.resources.lvmd }}
resources: {{ toYaml . | nindent 12 }}
{{- end }}
Expand Down
37 changes: 37 additions & 0 deletions charts/topolvm/templates/lvmd/podmonitor.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
{{- if .Values.lvmd.prometheus.podMonitor.enabled }}
apiVersion: monitoring.coreos.com/v1
kind: PodMonitor
metadata:
name: {{ template "topolvm.fullname" . }}-lvmd
namespace: {{ .Values.lvmd.prometheus.podMonitor.namespace | default .Release.Namespace }}
labels:
{{- include "topolvm.labels" . | nindent 4 }}
{{- with .Values.lvmd.prometheus.podMonitor.additionalLabels }}
{{- toYaml . | nindent 4 }}
{{- end }}
spec:
selector:
matchLabels:
app.kubernetes.io/component: lvmd
{{ include "topolvm.selectorLabels" . | nindent 6 }}
namespaceSelector:
matchNames:
- {{ .Release.Namespace }}
podMetricsEndpoints:
- path: /metrics
port: metrics
{{- with .Values.lvmd.prometheus.podMonitor.interval }}
interval: {{ . }}
{{- end }}
{{- with .Values.lvmd.prometheus.podMonitor.scrapeTimeout }}
scrapeTimeout: {{ . }}
{{- end }}
{{- with .Values.lvmd.prometheus.podMonitor.relabelings }}
relabelings:
{{- toYaml . | nindent 6 }}
{{- end }}
{{- with .Values.lvmd.prometheus.podMonitor.metricRelabelings }}
metricRelabelings:
{{- toYaml . | nindent 6 }}
{{- end }}
{{- end }}
40 changes: 40 additions & 0 deletions charts/topolvm/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,46 @@ lvmd:
# lvmd.profiling.bindAddress -- Enables pprof profiling server. If empty, profiling is disabled.
bindAddress: ""

metrics:
# lvmd.metrics.enabled -- If true, enable scraping of metrics by Prometheus.
enabled: true
# lvmd.metrics.annotations -- Annotations for Scrape used by Prometheus.
annotations:
prometheus.io/port: metrics

prometheus:
podMonitor:
# lvmd.prometheus.podMonitor.enabled -- Set this to `true` to create PodMonitor for Prometheus operator.
enabled: false

# lvmd.prometheus.podMonitor.additionalLabels -- Additional labels that can be used so PodMonitor will be discovered by Prometheus.
additionalLabels: {}

# lvmd.prometheus.podMonitor.namespace -- Optional namespace in which to create PodMonitor.
namespace: ""

# lvmd.prometheus.podMonitor.interval -- Scrape interval. If not set, the Prometheus default scrape interval is used.
interval: ""

# lvmd.prometheus.podMonitor.scrapeTimeout -- Scrape timeout. If not set, the Prometheus default scrape timeout is used.
scrapeTimeout: ""

# lvmd.prometheus.podMonitor.relabelings -- RelabelConfigs to apply to samples before scraping.
relabelings: []
# - sourceLabels: [__meta_kubernetes_service_label_cluster]
# targetLabel: cluster
# regex: (.*)
# replacement: ${1}
# action: replace

# lvmd.prometheus.podMonitor.metricRelabelings -- MetricRelabelConfigs to apply to samples before ingestion.
metricRelabelings: []
# - sourceLabels: [__meta_kubernetes_service_label_cluster]
# targetLabel: cluster
# regex: (.*)
# replacement: ${1}
# action: replace

# CSI node service
node:
# node.lvmdEmbedded -- Specify whether to embed lvmd in the node container.
Expand Down
57 changes: 48 additions & 9 deletions cmd/lvmd/app/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,12 @@ import (
"os"
"os/signal"
"path/filepath"
"sync"
"syscall"
"time"

"github.com/go-logr/logr"
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/spf13/cobra"
"github.com/topolvm/topolvm"
"github.com/topolvm/topolvm/internal/lvmd"
Expand All @@ -33,6 +36,7 @@ var (
lvmPath string
zapOpts zap.Options
profilingBindAddress string
metricsBindAddress string
)

// rootCmd represents the base command when called without any subcommands
Expand Down Expand Up @@ -112,15 +116,7 @@ func subMain(parentCtx context.Context) error {
ctx, stop := signal.NotifyContext(parentCtx, os.Interrupt, syscall.SIGTERM)
defer stop()

var pprofServer *http.Server
if profilingBindAddress != "" {
pprofServer = profiling.NewProfilingServer(profilingBindAddress)
go func() {
if err := pprofServer.ListenAndServe(); !errors.Is(err, http.ErrServerClosed) {
logger.Error(err, "pprof server error")
}
}()
}
wg, pprofServer, metricsServer := startMetricsAndProfilingServers(logger)

go func() {
ticker := time.NewTicker(10 * time.Minute)
Expand All @@ -133,7 +129,13 @@ func subMain(parentCtx context.Context) error {
logger.Error(err, "failed to shutdown pprof server")
}
}
if metricsServer != nil {
if err := metricsServer.Shutdown(parentCtx); err != nil {
logger.Error(err, "failed to shutdown metrics server")
}
}
grpcServer.GracefulStop()
wg.Wait()
return
case <-ticker.C:
notifier()
Expand All @@ -144,6 +146,42 @@ func subMain(parentCtx context.Context) error {
return grpcServer.Serve(lis)
}

// startMetricsAndProfilingServers starts metrics and profiling servers if the bind addresses are set
// and returns a wait group to wait for the servers to stop.
func startMetricsAndProfilingServers(logger logr.Logger) (*sync.WaitGroup, *http.Server, *http.Server) {
var wg sync.WaitGroup
var pprofServer *http.Server
if profilingBindAddress != "" {
wg.Add(1)
pprofServer = profiling.NewProfilingServer(profilingBindAddress)
go func() {
defer wg.Done()
if err := pprofServer.ListenAndServe(); !errors.Is(err, http.ErrServerClosed) {
logger.Error(err, "pprof server error")
}
}()
}

var metricsServer *http.Server
if metricsBindAddress != "" {
wg.Add(1)
mux := http.NewServeMux()
mux.Handle("/metrics", promhttp.Handler())
metricsServer = &http.Server{
Addr: metricsBindAddress,
Handler: mux,
}
go func() {
defer wg.Done()
if err := metricsServer.ListenAndServe(); !errors.Is(err, http.ErrServerClosed) {
logger.Error(err, "metrics server error")
}
}()
}

return &wg, pprofServer, metricsServer
}

// Execute adds all child commands to the root command and sets flags appropriately.
// This is called by main.main(). It only needs to happen once to the rootCmd.
func Execute() {
Expand All @@ -158,6 +196,7 @@ func init() {
rootCmd.PersistentFlags().StringVar(&cfgFilePath, "config", filepath.Join("/etc", "topolvm", "lvmd.yaml"), "config file")
rootCmd.PersistentFlags().StringVar(&lvmPath, "lvm-path", "", "lvm command path on the host OS")
rootCmd.PersistentFlags().StringVar(&profilingBindAddress, "profiling-bind-address", "", "bind address to expose pprof profiling. If empty, profiling is disabled")
rootCmd.PersistentFlags().StringVar(&metricsBindAddress, "metrics-bind-address", ":8080", "bind address to expose prometheus metrics. If empty, metrics are disabled")

goflags := flag.NewFlagSet("klog", flag.ExitOnError)
klog.InitFlags(goflags)
Expand Down
27 changes: 27 additions & 0 deletions test/e2e/metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
lvmdApp "github.com/topolvm/topolvm/cmd/lvmd/app"
"github.com/topolvm/topolvm/internal/lvmd"
lvmdTypes "github.com/topolvm/topolvm/pkg/lvmd/types"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
"sigs.k8s.io/yaml"
)
Expand Down Expand Up @@ -143,6 +144,32 @@ func testMetrics() {
}
})
})

Describe("topolvm-lvmd", func() {
It("should open ports for metrics", func() {
managed, err := isLVMManaged()
Expect(err).ShouldNot(HaveOccurred())
if managed {
Eventually(func() error {
_, err := kubectl("exec", "-n", "topolvm-system", "daemonset/topolvm-lvmd-0", "-c=lvmd", "--",
"curl", "http://localhost:8080/metrics")
return err
}).Should(Succeed())
}
})
})
}

func isLVMManaged() (bool, error) {
var ds appsv1.DaemonSet
err := getObjects(&ds, "-n", "topolvm-system", "daemonset/topolvm-lvmd-0")
if err == ErrObjectNotFound {
return false, nil
}
if err != nil {
return false, fmt.Errorf("failed to get DaemonSet: %w", err)
}
return true, nil
}

func getMetricsFamily(nodeIP string) (map[string]*dto.MetricFamily, error) {
Expand Down

0 comments on commit c49d008

Please sign in to comment.