From 0558fcd63431a35cae74695fb06f7ed9a8744dd3 Mon Sep 17 00:00:00 2001 From: Alex Demidoff Date: Mon, 30 Sep 2024 00:44:14 +0300 Subject: [PATCH 1/2] PMM-9315 Inegrate Loki with PMM --- .../ansible/roles/loki/files/Dockerfile.loki | 24 ++ build/ansible/roles/loki/files/loki.sh | 215 ++++++++++++++ build/ansible/roles/loki/files/nginx.conf | 63 +++++ build/ansible/roles/loki/files/pmm.conf | 264 ++++++++++++++++++ build/ansible/roles/loki/tasks/main.yml | 7 + 5 files changed, 573 insertions(+) create mode 100644 build/ansible/roles/loki/files/Dockerfile.loki create mode 100755 build/ansible/roles/loki/files/loki.sh create mode 100644 build/ansible/roles/loki/files/nginx.conf create mode 100644 build/ansible/roles/loki/files/pmm.conf create mode 100644 build/ansible/roles/loki/tasks/main.yml diff --git a/build/ansible/roles/loki/files/Dockerfile.loki b/build/ansible/roles/loki/files/Dockerfile.loki new file mode 100644 index 0000000000..429d903be6 --- /dev/null +++ b/build/ansible/roles/loki/files/Dockerfile.loki @@ -0,0 +1,24 @@ +# TODO: the image should be percona/pmm-server:3 once PMM v3 is released. + +# To build the image, run the following in the project root directory: +# docker buildx build --progress plain -t perconalab/pmm-server:loki-3.2 -f ./build/ansible/roles/loki/files/Dockerfile.loki . # mind the dot +FROM perconalab/pmm-server:3-dev-latest + +USER root + +RUN sed -i '/^assumeyes/d' /etc/dnf/dnf.conf + +COPY build/ansible/roles/loki/files/loki.sh /tmp/ +COPY build/ansible/roles/loki/files/pmm.conf /etc/nginx/conf.d/pmm.conf +COPY build/ansible/roles/loki/files/nginx.conf /etc/nginx/nginx.conf +RUN chown pmm:pmm /etc/nginx/conf.d/pmm.conf /etc/nginx/nginx.conf +RUN chmod +x /tmp/loki.sh && /bin/bash -e /tmp/loki.sh && rm /tmp/loki.sh + +USER pmm + +VOLUME /srv + +ENV GF_ANALYTICS_CHECK_FOR_UPDATES=false +ENV GF_ANALYTICS_REPORTING_ENABLED=false +ENV GF_SECURITY_DISABLE_GRAVATAR=true +ENV GF_UNIFIED_ALERTING_ENABLED=true diff --git a/build/ansible/roles/loki/files/loki.sh b/build/ansible/roles/loki/files/loki.sh new file mode 100755 index 0000000000..bb1cb984ba --- /dev/null +++ b/build/ansible/roles/loki/files/loki.sh @@ -0,0 +1,215 @@ +#!/bin/bash -e + +# Add grafana repository +cat < /etc/yum.repos.d/grafana.repo +[grafana] +name=grafana +baseurl=https://rpm.grafana.com +repo_gpgcheck=1 +enabled=0 +gpgcheck=1 +gpgkey=https://rpm.grafana.com/gpg.key +sslverify=1 +sslcacert=/etc/pki/tls/certs/ca-bundle.crt +EOF + +# Install loki and promtail, which also creates dedicated users +dnf install -y --disablerepo="*" --enablerepo=grafana loki promtail + +# Add promtail and loki users to pmm user group +usermod -a -G pmm promtail +usermod -a -G pmm loki + +mkdir -p /srv/loki +chown pmm:pmm /srv/loki + +cat < /etc/supervisord.d/loki.ini +[program:loki] +priority = 20 +command = + /usr/bin/loki + -config.file /etc/loki/config.yml +user = pmm +autorestart = true +autostart = true +startretries = 1000 +startsecs = 3 +stopsignal = TERM +stopwaitsecs = 10 +stdout_logfile = /srv/logs/loki.log +stdout_logfile_maxbytes = 10MB +stdout_logfile_backups = 3 +redirect_stderr = true + + +[program:promtail] +priority = 21 +command = + /usr/bin/promtail + -config.file /srv/loki/promtail.yml +user = pmm +autorestart = true +autostart = true +startretries = 1000 +startsecs = 3 +stopsignal = TERM +stopwaitsecs = 10 +stdout_logfile = /srv/logs/promtail.log +stdout_logfile_maxbytes = 10MB +stdout_logfile_backups = 3 +redirect_stderr = true +EOF + +cat < /etc/loki/config.yml +auth_enabled: false + +server: + http_listen_port: 3100 + grpc_listen_port: 9096 + log_level: debug + grpc_server_max_concurrent_streams: 1000 + +common: + instance_addr: 127.0.0.1 + path_prefix: /srv/loki + storage: + filesystem: + chunks_directory: /srv/loki/chunks + rules_directory: /srv/loki/rules + replication_factor: 1 + ring: + kvstore: + store: inmemory + +ingester_rf1: + enabled: false + +query_range: + results_cache: + cache: + embedded_cache: + enabled: true + max_size_mb: 100 + +schema_config: + configs: + - from: 2020-10-24 + store: tsdb + object_store: filesystem + schema: v13 + index: + prefix: index_ + period: 24h + +pattern_ingester: + enabled: true + metric_aggregation: + enabled: true + loki_address: 127.0.0.1:3100 + +ruler: + alertmanager_url: http://127.0.0.1:9093 + +frontend: + encoding: protobuf + +analytics: + reporting_enabled: false +EOF + +cat < /srv/loki/promtail.yml +# Important: too much scraping during init process can overload the system. +# https://github.com/grafana/loki/issues/11398 + +server: + http_listen_port: 9080 + grpc_listen_port: 0 + +positions: + filename: /tmp/positions.yaml + +clients: +- url: http://127.0.0.1:3100/loki/api/v1/push + +scrape_configs: +- job_name: nginx + static_configs: + - targets: + - 127.0.0.1 + labels: + job: nginx + __path__: /srv/logs/nginx.log + +- job_name: grafana + static_configs: + - targets: + - 127.0.0.1 + labels: + job: grafana + __path__: /srv/logs/grafana.log + +- job_name: pmm-agent + static_configs: + - targets: + - 127.0.0.1 + labels: + job: pmm-agent + __path__: /srv/logs/pmm-agent.log + node_name: pmm-server + +- job_name: pmm-managed + static_configs: + - targets: + - 127.0.0.1 + labels: + job: pmm-managed + __path__: /srv/logs/pmm-managed.log + +- job_name: qan + static_configs: + - targets: + - 127.0.0.1 + labels: + job: qan + __path__: /srv/logs/qani-api2.log + +- job_name: victoriametrics + static_configs: + - targets: + - 127.0.0.1 + labels: + job: victoriametrcis + __path__: /srv/logs/victoriametrics.log + +- job_name: clickhouse + static_configs: + - targets: + - 127.0.0.1 + labels: + job: clickhouse + __path__: /srv/logs/clickhouse-server.log + +- job_name: supervisor + static_configs: + - targets: + - 127.0.0.1 + labels: + job: supervisor + __path__: /srv/logs/supervisord.log +EOF + +cat < /usr/share/grafana/conf/provisioning/datasources/loki.yml +apiVersion: 1 +datasources: + - name: Loki + type: loki + uid: loki + access: proxy + url: http://127.0.0.1:3100 +EOF + +# Change ownership of all files we added +chown pmm:pmm /etc/supervisord.d/loki.ini +chown pmm:pmm /etc/loki/config.yml +chown pmm:pmm /srv/loki/promtail.yml +chown pmm:pmm /usr/share/grafana/conf/provisioning/datasources/loki.yml diff --git a/build/ansible/roles/loki/files/nginx.conf b/build/ansible/roles/loki/files/nginx.conf new file mode 100644 index 0000000000..e9c9c67fa5 --- /dev/null +++ b/build/ansible/roles/loki/files/nginx.conf @@ -0,0 +1,63 @@ +# user pmm; ## It's ignored when the master process is not run by root. +worker_processes 2; + +daemon off; + +error_log /dev/stderr warn; +pid /run/nginx.pid; + +events { + worker_connections 4096; +} + +http { + map $http_upgrade $connection_upgrade { + default upgrade; + '' close; + } + + include /etc/nginx/mime.types; + default_type application/octet-stream; + + log_format main '$remote_addr - $remote_user [$time_local] "$request" ' + '$status $body_bytes_sent "$http_referer" ' + '"$http_user_agent" "$http_x_forwarded_for"'; + + log_format json_format escape=json '{' + '"time_local": "$time_local",' + '"host": "$http_host",' + '"remote_addr": "$remote_addr",' + '"request_method": "$request_method",' + '"request": "$request_uri",' + '"request_time": "$request_time",' + '"body_bytes_sent": "$body_bytes_sent",' + '"server_protocol": "$server_protocol",' + '"status": "$status",' + '"http_referrer":"$http_referer",' + '"http_x_forwarded_for": "$http_x_forwarded_for",' + '"http_user_agent": "$http_user_agent"' + '}'; + + access_log /dev/stdout json_format; + + sendfile on; + gzip on; + etag on; + + keepalive_timeout 65; + + resolver 8.8.8.8 8.8.4.4 valid=300s; + resolver_timeout 3s; + + ## TODO https://jira.percona.com/browse/PMM-4670 + # CWE-693, CWE-16 + add_header X-Frame-Options DENY; + add_header X-Content-Type-Options nosniff; + # TODO X-XSS-Protection useless for modern browsers which support CSP. We need to implement CSP instead. + add_header X-XSS-Protection "1; mode=block"; + # CWE-524, CWE-525 + add_header Cache-control "no-cache"; + add_header Pragma "no-cache"; + + include /etc/nginx/conf.d/*.conf; +} diff --git a/build/ansible/roles/loki/files/pmm.conf b/build/ansible/roles/loki/files/pmm.conf new file mode 100644 index 0000000000..448fc0c244 --- /dev/null +++ b/build/ansible/roles/loki/files/pmm.conf @@ -0,0 +1,264 @@ + upstream managed-grpc { + server 127.0.0.1:7771; + keepalive 32; + } + upstream managed-json { + server 127.0.0.1:7772; + keepalive 32; + keepalive_requests 100; + keepalive_timeout 75s; + } + + upstream qan-api-grpc { + server 127.0.0.1:9911; + keepalive 32; + } + upstream qan-api-json { + server 127.0.0.1:9922; + keepalive 32; + keepalive_requests 100; + keepalive_timeout 75s; + } + + upstream vmproxy { + server localhost:8430; + keepalive 32; + keepalive_requests 100; + keepalive_timeout 75s; + } + + upstream loki { + server 127.0.0.1:3100; + keepalive 32; + } + + server { + listen 8080; + listen 8443 ssl http2; + server_name _; + server_tokens off; + + # allow huge requests + large_client_header_buffers 128 64k; + + client_max_body_size 10m; + + ssl_certificate /srv/nginx/certificate.crt; + ssl_certificate_key /srv/nginx/certificate.key; + ssl_trusted_certificate /srv/nginx/ca-certs.pem; + ssl_dhparam /srv/nginx/dhparam.pem; + + # this block checks for maintenance.html file and, if it exists, it redirects all requests to the maintenance page + # there are two exceptions for it /v1/updates/Status and /auth_request endpoints + set $maintenance_mode 0; + + if (-f /usr/share/pmm-server/maintenance/maintenance.html) { + set $maintenance_mode 1; + } + + if ($request_uri ~* "^/v1/updates/Status|^/auth_request") { + set $maintenance_mode 0; + } + + if ($maintenance_mode = 1) { + return 503; + } + + error_page 503 @maintenance; + + location @maintenance { + auth_request off; + root /usr/share/pmm-server/maintenance; + rewrite ^(.*)$ /maintenance.html break; + } + + + # Enable passing of the remote user's IP address to all + # proxied services using the X-Forwarded-For header + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + + # Enable auth_request for all locations, including root + # (but excluding /auth_request). + auth_request /auth_request; + + # Store the value of X-Proxy-Filter header of auth_request subrequest response in the variable. + auth_request_set $auth_request_proxy_filter $upstream_http_x_proxy_filter; + proxy_set_header X-Proxy-Filter $auth_request_proxy_filter; + + # nginx completely ignores auth_request subrequest response body. + # We use that directive to send the same request to the same location as a normal request + # to get a response body or redirect and return it to the client. + # auth_request supports only 401 and 403 statuses; 401 is reserved for this configration, + # and 403 is used for normal pmm-managed API errors. + error_page 401 = /auth_request; + + # Internal location for authentication via pmm-managed/Grafana. + # First, nginx sends request there to authenticate it. If it is not authenticated by pmm-managed/Grafana, + # it is sent to this location for the second time (as a normal request) by error_page directive above. + location /auth_request { + internal; + + auth_request off; + + proxy_pass http://managed-json/auth_request; + + # nginx always strips body from authentication subrequests. + # Overwrite Content-Length to avoid problems on Go side and to keep connection alive. + proxy_pass_request_body off; + proxy_set_header Content-Length 0; + + proxy_http_version 1.1; + proxy_set_header Connection ""; + + # Those headers are set for both subrequest and normal request. + proxy_set_header X-Original-Uri $request_uri; + proxy_set_header X-Original-Method $request_method; + } + + # PMM UI + location /pmm-ui { + # Will redirect on FE to login page if user is not authenticated + auth_request off; + + alias /usr/share/pmm-ui; + try_files $uri /index.html break; + } + + # Grafana + rewrite ^/$ $scheme://$http_host/graph/; + rewrite ^/graph$ /graph/; + location /graph { + proxy_cookie_path / "/;"; + proxy_pass http://127.0.0.1:3000; + rewrite ^/graph/(.*) /$1 break; + proxy_read_timeout 600; + proxy_http_version 1.1; + proxy_set_header Connection $connection_upgrade; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Host $http_host; + proxy_set_header X-Proxy-Filter $auth_request_proxy_filter; + } + + # Prometheus + location /prometheus { + proxy_pass http://127.0.0.1:9090; + proxy_read_timeout 600; + proxy_http_version 1.1; + proxy_set_header Connection ""; + } + location /prometheus/api/v1 { + proxy_pass http://vmproxy; + proxy_read_timeout 600; + proxy_http_version 1.1; + proxy_set_header Connection ""; + } + + # VictoriaMetrics + location /victoriametrics/ { + proxy_pass http://127.0.0.1:9090/prometheus/; + proxy_read_timeout 600; + proxy_http_version 1.1; + proxy_set_header Connection ""; + client_body_buffer_size 10m; + } + + # VMAlert + location /prometheus/rules { + proxy_pass http://127.0.0.1:8880/api/v1/rules; + proxy_read_timeout 600; + proxy_http_version 1.1; + proxy_set_header Connection ""; + } + location /prometheus/alerts { + proxy_pass http://127.0.0.1:8880/api/v1/alerts; + proxy_read_timeout 600; + proxy_http_version 1.1; + proxy_set_header Connection ""; + } + + # Loki + location = /loki/api/v1/push { + proxy_pass http://loki$request_uri; + } + + # Swagger UI + rewrite ^/swagger/swagger.json$ $scheme://$http_host/swagger.json permanent; + rewrite ^(/swagger)/(.*)$ $scheme://$http_host/swagger permanent; + location /swagger { + auth_request off; + root /usr/share/pmm-managed/swagger; + try_files $uri /index.html break; + } + + # pmm-managed gRPC APIs + location /agent. { + grpc_pass grpc://managed-grpc; + # Disable request body size check for gRPC streaming, see https://trac.nginx.org/nginx/ticket/1642. + # pmm-managed uses grpc.MaxRecvMsgSize for that. + client_max_body_size 0; + } + location /inventory. { + grpc_pass grpc://managed-grpc; + } + location /management. { + grpc_pass grpc://managed-grpc; + } + location /server. { + grpc_pass grpc://managed-grpc; + } + + # pmm-managed JSON APIs + location /v1/ { + proxy_pass http://managed-json/v1/; + proxy_http_version 1.1; + proxy_set_header Connection ""; + } + + # qan-api gRPC APIs should not be exposed + + # qan-api JSON APIs + location /v1/qan { + proxy_pass http://qan-api-json/v1/qan; + proxy_http_version 1.1; + proxy_set_header Connection ""; + } + + # compatibility with PMM 1.x + rewrite ^/ping$ /v1/server/readyz; + # compatibility with PMM 2.x + rewrite ^/v1/readyz$ /v1/server/readyz; + rewrite ^/v1/version$ /v1/server/version; + rewrite ^/logs.zip$ /v1/server/logs.zip; + + # logs.zip in both PMM 1.x and 2.x variants + location /v1/server/logz.zip { + proxy_pass http://managed-json; + proxy_http_version 1.1; + proxy_set_header Connection ""; + } + + # pmm-dump artifacts + location /dump { + alias /srv/dump/; + } + + # This localtion stores static content for general pmm-server purposes. + # Ex.: local-rss.xml - contains Percona's news when no internet connection. + location /pmm-static { + auth_request off; + alias /usr/share/pmm-server/static; + } + + # proxy requests to the Percona's blog feed + # fallback to local rss if pmm-server is isolated from internet. + # https://jira.percona.com/browse/PMM-6153 + location = /percona-blog/feed { + auth_request off; + proxy_ssl_server_name on; + + set $feed https://www.percona.com/blog/feed/; + proxy_pass $feed; + proxy_set_header User-Agent "$http_user_agent pmm-server/3.x"; + error_page 500 502 503 504 /pmm-static/local-rss.xml; + } + } diff --git a/build/ansible/roles/loki/tasks/main.yml b/build/ansible/roles/loki/tasks/main.yml new file mode 100644 index 0000000000..0c394c1434 --- /dev/null +++ b/build/ansible/roles/loki/tasks/main.yml @@ -0,0 +1,7 @@ +--- +# This role install and configures Loki (https://github.com/grafana/loki). +- name: detect /srv/pmm-distribution + slurp: + path: /srv/pmm-distribution + register: pmm_distribution + ignore_errors: True From 8590c197c9f9555b5d0db4ee361f948b98cff53c Mon Sep 17 00:00:00 2001 From: Alex Demidoff Date: Mon, 30 Sep 2024 00:50:02 +0300 Subject: [PATCH 2/2] PMM-9315 Inegrate Loki with PMM --- build/ansible/roles/loki/files/Dockerfile.loki | 2 -- build/ansible/roles/loki/tasks/main.yml | 7 +------ 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/build/ansible/roles/loki/files/Dockerfile.loki b/build/ansible/roles/loki/files/Dockerfile.loki index 429d903be6..c76b4d55cf 100644 --- a/build/ansible/roles/loki/files/Dockerfile.loki +++ b/build/ansible/roles/loki/files/Dockerfile.loki @@ -20,5 +20,3 @@ VOLUME /srv ENV GF_ANALYTICS_CHECK_FOR_UPDATES=false ENV GF_ANALYTICS_REPORTING_ENABLED=false -ENV GF_SECURITY_DISABLE_GRAVATAR=true -ENV GF_UNIFIED_ALERTING_ENABLED=true diff --git a/build/ansible/roles/loki/tasks/main.yml b/build/ansible/roles/loki/tasks/main.yml index 0c394c1434..e5e512bd72 100644 --- a/build/ansible/roles/loki/tasks/main.yml +++ b/build/ansible/roles/loki/tasks/main.yml @@ -1,7 +1,2 @@ --- -# This role install and configures Loki (https://github.com/grafana/loki). -- name: detect /srv/pmm-distribution - slurp: - path: /srv/pmm-distribution - register: pmm_distribution - ignore_errors: True +# This role installs and configures Loki (https://github.com/grafana/loki).