diff --git a/catalog-info.yaml b/catalog-info.yaml index 0f02883..916cb29 100644 --- a/catalog-info.yaml +++ b/catalog-info.yaml @@ -1,148 +1,4 @@ apiVersion: backstage.io/v1alpha1 -kind: Component -metadata: - name: monitoring-platform-webhook-initializer - namespace: webgrip - title: Webhook Initializer - description: | - The Webhook Initializer service sets up webhooks for the monitoring platform. - labels: - tier: "2" - annotations: - github.com/project-slug: webgrip/webhook-initializer - backstage.io/techdocs-ref: dir:. - backstage.io/kubernetes-label-selector: 'app.kubernetes.io/name=webhook-initializer' - simpleicons.org/icon-slug: githubactions - tags: - - monitoring - - webhook - - automation - links: - - url: https://github.com/webgrip/webhook-initializer - title: Source Code - icon: github - type: source -spec: - type: service - lifecycle: development - owner: group:webgrip/infrastructure - system: monitoring-platform-system ---- -apiVersion: backstage.io/v1alpha1 -kind: Component -metadata: - name: monitoring-platform-github-webhook-receiver - namespace: webgrip - title: GitHub Webhook Receiver - description: | - Receives and processes GitHub webhooks for the monitoring platform. - labels: - tier: "2" - annotations: - github.com/project-slug: webgrip/github-webhook-receiver - backstage.io/techdocs-ref: dir:. - backstage.io/kubernetes-label-selector: 'app.kubernetes.io/name=monitoring-platform-github-webhook-receiver' - simpleicons.org/icon-slug: githubactions - tags: - - monitoring - - github - - webhook - links: - - url: https://github.com/webgrip/github-webhook-receiver - title: Source Code - icon: github - type: source -spec: - type: service - lifecycle: development - owner: group:webgrip/infrastructure - system: monitoring-platform-system ---- -apiVersion: backstage.io/v1alpha1 -kind: Component -metadata: - name: monitoring-platform-grafana-image-renderer - namespace: webgrip - title: Grafana Image Renderer - description: | - Grafana plugin for rendering panels and dashboards as images. - labels: - tier: "2" - annotations: - docker.io/image: grafana/grafana-image-renderer:3.11.0 - backstage.io/techdocs-ref: dir:. - backstage.io/kubernetes-label-selector: 'app.kubernetes.io/name=monitoring-platform-grafana-image-renderer' - simpleicons.org/icon-slug: grafana - tags: - - monitoring - - grafana - - rendering - links: - - url: https://github.com/grafana/grafana-image-renderer - title: Source Code - icon: github - type: source -spec: - type: library - lifecycle: development - owner: group:webgrip/infrastructure - system: monitoring-platform-system ---- -apiVersion: backstage.io/v1alpha1 -kind: Component -metadata: - name: monitoring-platform-ngrok - namespace: webgrip - title: Ngrok - description: | - Ngrok is used to expose the local webhook receiver to the internet for testing. - labels: - tier: "2" - annotations: - docker.io/image: ngrok/ngrok:3.10.0-alpine - backstage.io/techdocs-ref: dir:. - backstage.io/kubernetes-label-selector: 'app.kubernetes.io/name=monitoring-platform-ngrok' - simpleicons.org/icon-slug: ngrok - tags: - - monitoring - - ngrok - - tunneling - links: - - url: https://ngrok.com/ - title: Ngrok Website - icon: link - type: website - - title: Prometheus - url: https://google.ch - icon: prometheus - type: website - - title: Grafana - url: https://google.ch - icon: grafana - type: website - - title: Google BigQuery - url: https://google.ch - icon: googlebigquery - type: website - - title: 1.1.1.1 - url: https://google.ch - icon: 1dot1dot1dot1 - type: website - - title: Github copilot - url: https://google.ch - icon: githubcopilot - type: website - - title: Backstage - url: https://google.ch - icon: backstage - type: website -spec: - type: tool - lifecycle: development - owner: group:webgrip/infrastructure - system: monitoring-platform-system ---- -apiVersion: backstage.io/v1alpha1 kind: System metadata: name: monitoring-platform-system @@ -205,6 +61,36 @@ spec: --- apiVersion: backstage.io/v1alpha1 kind: Component +metadata: + name: monitoring-platform-grafana-image-renderer + namespace: webgrip + title: Grafana Image Renderer + description: | + Grafana plugin for rendering panels and dashboards as images. + labels: + tier: "2" + annotations: + docker.io/image: grafana/grafana-image-renderer:3.11.0 + backstage.io/techdocs-ref: dir:. + backstage.io/kubernetes-label-selector: 'app.kubernetes.io/name=monitoring-platform-grafana-image-renderer' + simpleicons.org/icon-slug: grafana + tags: + - monitoring + - grafana + - rendering + links: + - url: https://github.com/grafana/grafana-image-renderer + title: Source Code + icon: github + type: source +spec: + type: library + lifecycle: development + owner: group:webgrip/infrastructure + system: monitoring-platform-system +--- +apiVersion: backstage.io/v1alpha1 +kind: Component metadata: name: monitoring-platform-loki namespace: webgrip diff --git a/docker-compose.github-runners.yml b/docker-compose.github-runners.yml deleted file mode 100644 index 13954c1..0000000 --- a/docker-compose.github-runners.yml +++ /dev/null @@ -1,46 +0,0 @@ -services: - monitoring-platform-github-webhook-receiver: - container_name: monitoring-platform-github-webhook-receiver - build: - context: ops/github-webhook-receiver - dockerfile: Dockerfile - ports: - - "8000:8000" - environment: - - SERVICE_PUBLIC_URL=http://monitoring-platform-ngrok:4040 - - monitoring-platform-ngrok: - container_name: monitoring-platform-ngrok - image: ngrok/ngrok:3.10.0-alpine - #restart: unless-stopped - command: - - "start" - - "--all" - - "--config" - - "/etc/ngrok.yml" - volumes: - - ./ops/ngrok/ngrok.yml:/etc/ngrok.yml - ports: - - 4040:4040 - env_file: - - ops/ngrok/.env - - monitoring-platform-node_exporter: - container_name: monitoring-platform-node_exporter - image: prom/node-exporter:v1.8.1 - ports: - - "9100:9100" - - monitoring-platform-prometheus: - container_name: monitoring-platform-prometheus - image: prom/prometheus:v2.52.0 - volumes: - - ./ops/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml - - ./ops/prometheus/alert.rules.yml:/etc/prometheus/alert.rules.yml - ports: - - "9090:9090" - -networks: - default: - external: true - name: webgrip diff --git a/docker-compose.init.yml b/docker-compose.init.yml deleted file mode 100644 index 0dbbf5a..0000000 --- a/docker-compose.init.yml +++ /dev/null @@ -1,13 +0,0 @@ -services: - webhook-initializer: - container_name: webhook-initializer - build: - context: ops/webhook-initializer - dockerfile: Dockerfile - env_file: - - ops/webhook-initializer/.env - -networks: - default: - external: true - name: webgrip \ No newline at end of file diff --git a/docs/mermaid/full_system.mmd b/docs/mermaid/full_system.mmd index 20c0019..79cadf6 100644 --- a/docs/mermaid/full_system.mmd +++ b/docs/mermaid/full_system.mmd @@ -22,9 +22,6 @@ C4Context Rel(DevelopmentOpenTelemetryCollector, SystemTempo, "Sends traces to") Rel(DevelopmentOpenTelemetryCollector, SystemLoki, "Sends logs to") } - - - } System_Boundary(BoundaryMonitoringPlatform, "Monitoring platform") { diff --git a/ops/github-webhook-receiver/Dockerfile b/ops/github-webhook-receiver/Dockerfile deleted file mode 100644 index dfc7ad5..0000000 --- a/ops/github-webhook-receiver/Dockerfile +++ /dev/null @@ -1,6 +0,0 @@ -FROM node:22-alpine3.20 -WORKDIR /app -COPY src/ . -RUN npm install -EXPOSE 8000 -CMD ["node", "--trace-deprecation", "index.js"] diff --git a/ops/github-webhook-receiver/src/index.js b/ops/github-webhook-receiver/src/index.js deleted file mode 100644 index bce1409..0000000 --- a/ops/github-webhook-receiver/src/index.js +++ /dev/null @@ -1,219 +0,0 @@ -const express = require('express'); -const bodyParser = require('body-parser'); -const client = require('prom-client'); -const app = express(); -const collectDefaultMetrics = client.collectDefaultMetrics; - -// Probe every 5th second. -collectDefaultMetrics({ timeout: 5000 }); - -// Custom metrics -const jobDuration = new client.Histogram({ - name: 'github_actions_job_duration_seconds', - help: 'Duration of GitHub Actions jobs in seconds', - labelNames: ['status', 'job_name', 'client'] -}); - -const jobCounter = new client.Counter({ - name: 'github_actions_jobs_total', - help: 'Total number of GitHub Actions jobs', - labelNames: ['status', 'job_name', 'client'] -}); - -const runningJobsGauge = new client.Gauge({ - name: 'github_actions_running_jobs', - help: 'Number of currently running GitHub Actions jobs', - labelNames: ['client'] -}); - -const latestJobs = new client.Gauge({ - name: 'github_actions_latest_jobs', - help: 'Latest GitHub Actions job statuses', - labelNames: ['status', 'job_name', 'client'] -}); - -// Additional metrics -const prReviewsCounter = new client.Counter({ - name: 'github_pr_reviews_total', - help: 'Total number of pull request reviews', - labelNames: ['action', 'client'] -}); - -const prCommentsCounter = new client.Counter({ - name: 'github_pr_review_comments_total', - help: 'Total number of pull request review comments', - labelNames: ['action', 'client'] -}); - -const issueCommentsCounter = new client.Counter({ - name: 'github_issue_comments_total', - help: 'Total number of issue comments', - labelNames: ['action', 'client'] -}); - -// Update metrics based on webhook events -app.use(bodyParser.json()); - -app.post('/webhook', (req, res) => { - console.log('Received a POST request on /webhook'); - const event = req.body; - - switch (req.headers['x-github-event']) { - case 'ping': - console.log('Received ping event'); - break; - case 'workflow_run': - handleWorkflowRun(event); - break; - case 'workflow_job': - handleWorkflowJob(event); - break; - case 'check_run': - handleCheckRun(event); - break; - case 'push': - handlePush(event); - break; - case 'pull_request': - handlePullRequest(event); - break; - case 'pull_request_review': - handlePullRequestReview(event); - break; - case 'pull_request_review_comment': - console.log('Handling pull_request_review_comment event'); - handlePullRequestReviewComment(event); - break; - case 'issues': - console.log('Handling issues event'); - handleIssues(event); - break; - case 'issue_comment': - console.log('Handling issue_comment event'); - handleIssueComment(event); - break; - default: - console.log(`Unhandled event type: ${req.headers['x-github-event']}`); - } - - res.status(200).send('Webhook received'); - console.log('Webhook processed successfully'); -}); - -function handleWorkflowRun(event) { - if (event.action === 'completed') { - const { workflow_run } = event; - const clientName = workflow_run.repository.owner.login; - const jobName = workflow_run.name; - const status = workflow_run.conclusion; - const duration = (new Date(workflow_run.updated_at) - new Date(workflow_run.created_at)) / 1000; - - jobDuration.labels(status, jobName, clientName).observe(duration); - jobCounter.labels(status, jobName, clientName).inc(); - runningJobsGauge.labels(clientName).dec(); - latestJobs.labels(status, jobName, clientName).setToCurrentTime(); - } else if (event.action === 'requested') { - const { workflow_run } = event; - const clientName = workflow_run.repository.owner.login; - runningJobsGauge.labels(clientName).inc(); - } -} - -function handleWorkflowJob(event) { - if (event.action === 'completed') { - const { workflow_job, repository } = event; - const clientName = repository.owner.login; - const jobName = workflow_job.name; - const status = workflow_job.conclusion; - const duration = (new Date(workflow_job.completed_at) - new Date(workflow_job.started_at)) / 1000; - - jobDuration.labels(status, jobName, clientName).observe(duration); - jobCounter.labels(status, jobName, clientName).inc(); - runningJobsGauge.labels(clientName).dec(); - latestJobs.labels(status, jobName, clientName).setToCurrentTime(); - } else if (event.action === 'queued') { - const { workflow_job } = event; - const clientName = workflow_job.repository.owner.login; - runningJobsGauge.labels(clientName).inc(); - } -} - -function handleCheckRun(event) { - if (event.action === 'completed') { - const { check_run } = event; - const clientName = check_run.repository.owner.login; - const jobName = check_run.name; - const status = check_run.conclusion; - const duration = (new Date(check_run.completed_at) - new Date(check_run.started_at)) / 1000; - - jobDuration.labels(status, jobName, clientName).observe(duration); - jobCounter.labels(status, jobName, clientName).inc(); - } -} - -function handlePush(event) { - if (event.ref && event.commits) { - const clientName = event.repository.owner.login; - const jobName = 'push'; - const status = 'success'; - const duration = 0; // Push events don't have a duration - - jobDuration.labels(status, jobName, clientName).observe(duration); - jobCounter.labels(status, jobName, clientName).inc(); - latestJobs.labels(status, jobName, clientName).setToCurrentTime(); - } -} - -function handlePullRequest(event) { - if (event.action) { - const clientName = event.repository.owner.login; - prReviewsCounter.labels(event.action, clientName).inc(); - } -} - -function handlePullRequestReview(event) { - if (event.action) { - const clientName = event.repository.owner.login; - prReviewsCounter.labels(event.action, clientName).inc(); - } -} - -function handlePullRequestReviewComment(event) { - if (event.action) { - const clientName = event.repository.name; - prCommentsCounter.labels(event.action, clientName).inc(); - - // Log the details of the comment - console.log(`Pull request review comment event: ${event.action}`); - console.log(`Comment details: ${JSON.stringify(event.comment, null, 2)}`); - } -} - -function handleIssues(event) { - if (event.action) { - const clientName = event.repository.name; - issueCommentsCounter.labels(event.action, clientName).inc(); - } -} - -function handleIssueComment(event) { - if (event.action) { - const clientName = event.repository.name; - issueCommentsCounter.labels(event.action, clientName).inc(); - - // Log the details of the comment - console.log(`Issue comment event: ${event.action}`); - console.log(`Comment details: ${JSON.stringify(event.comment, null, 2)}`); - } -} - -app.get('/metrics', async (req, res) => { - console.log('Received a GET request on /metrics'); - res.set('Content-Type', client.register.contentType); - res.end(await client.register.metrics()); - console.log('Metrics sent successfully'); -}); - -app.listen(8000, () => { - console.log('Webhook receiver listening on port 8000'); -}); \ No newline at end of file diff --git a/ops/github-webhook-receiver/src/package.json b/ops/github-webhook-receiver/src/package.json deleted file mode 100644 index 43a9cf0..0000000 --- a/ops/github-webhook-receiver/src/package.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "name": "github-webhook-receiver", - "version": "1.0.0", - "main": "index.js", - "dependencies": { - "axios": "^1.7.2", - "body-parser": "^1.20.2", - "express": "^4.19.2", - "prom-client": "^15.1.2" - } -} diff --git a/ops/ngrok/.env.example b/ops/ngrok/.env.example deleted file mode 100644 index b61ec46..0000000 --- a/ops/ngrok/.env.example +++ /dev/null @@ -1,2 +0,0 @@ -NGROK_AUTHTOKEN= -NGROK_API_KEY= diff --git a/ops/ngrok/ngrok.yml b/ops/ngrok/ngrok.yml deleted file mode 100644 index 5d373f3..0000000 --- a/ops/ngrok/ngrok.yml +++ /dev/null @@ -1,13 +0,0 @@ -version: "2" -connect_timeout: 30s -console_ui: true -dns_resolver_ips: - - 1.1.1.1 - - 8.8.8.8 -heartbeat_interval: 1m -heartbeat_tolerance: 5s -web_addr: 0.0.0.0:4040 -tunnels: - monitoring-platform-github-webhook-receiver: - addr: monitoring-platform-github-webhook-receiver:8000 - proto: http \ No newline at end of file diff --git a/ops/prometheus/alert.rules.yml b/ops/prometheus/alert.rules.yml deleted file mode 100644 index b26383b..0000000 --- a/ops/prometheus/alert.rules.yml +++ /dev/null @@ -1,47 +0,0 @@ -groups: - - name: GitHubActionsAlerts - rules: - - alert: HighJobDuration - expr: histogram_quantile(0.95, sum(rate(github_actions_job_duration_seconds_bucket[5m])) by (le, job_name)) > 30 - for: 5m - labels: - severity: critical - annotations: - summary: "High Job Duration" - description: "Job '{{ $labels.job_name }}' duration is too high." - - - alert: HighBuildFailureRate - expr: sum(rate(github_actions_jobs_total{status="failure"}[5m])) by (job_name) / sum(rate(github_actions_jobs_total[5m])) by (job_name) > 0.2 - for: 5m - labels: - severity: warning - annotations: - summary: "High Build Failure Rate" - description: "The failure rate for job '{{ $labels.job_name }}' is above 20%." - - - alert: HighQueueTime - expr: avg(rate(github_actions_job_queue_time_seconds_sum[5m])) by (job_name) > 60 - for: 5m - labels: - severity: warning - annotations: - summary: "High Queue Time" - description: "The queue time for job '{{ $labels.job_name }}' is above 60 seconds." - - - alert: HighCPUUsage - expr: 100 - (avg by (instance) (rate(node_cpu_seconds_total{mode="idle"}[1m])) * 100) > 80 - for: 5m - labels: - severity: warning - annotations: - summary: "High CPU Usage" - description: "CPU usage is over 80% for more than 5 minutes." - - - alert: HighMemoryUsage - expr: node_memory_Active_bytes / node_memory_MemTotal_bytes * 100 > 80 - for: 5m - labels: - severity: warning - annotations: - summary: "High Memory Usage" - description: "Memory usage is over 80% for more than 5 minutes." diff --git a/ops/prometheus/prometheus.yml b/ops/prometheus/prometheus.yml deleted file mode 100644 index 0cbfa08..0000000 --- a/ops/prometheus/prometheus.yml +++ /dev/null @@ -1,22 +0,0 @@ -global: - scrape_interval: 15s - evaluation_interval: 15s - scrape_timeout: 10s - -scrape_configs: - - job_name: 'node-exporter' - static_configs: - - targets: ['node_exporter:9100'] - - - job_name: 'monitoring-platform-github-webhook-receiver' - static_configs: - - targets: ['monitoring-platform-github-webhook-receiver:8000'] - -rule_files: - - "alert.rules.yml" - -alerting: - alertmanagers: - - static_configs: - - targets: - - 'localhost:9093' # Alertmanager endpoint \ No newline at end of file diff --git a/ops/webhook-initializer/.env.example b/ops/webhook-initializer/.env.example deleted file mode 100644 index 036ec79..0000000 --- a/ops/webhook-initializer/.env.example +++ /dev/null @@ -1,2 +0,0 @@ -GITHUB_TOKEN= -GITHUB_ORG= \ No newline at end of file diff --git a/ops/webhook-initializer/Dockerfile b/ops/webhook-initializer/Dockerfile deleted file mode 100644 index 736dffa..0000000 --- a/ops/webhook-initializer/Dockerfile +++ /dev/null @@ -1,5 +0,0 @@ -FROM python:3.12.3-alpine3.20 -WORKDIR /app -COPY src/ . -RUN pip install --no-cache-dir -r requirements.txt -CMD ["python", "main.py"] \ No newline at end of file diff --git a/ops/webhook-initializer/src/main.py b/ops/webhook-initializer/src/main.py deleted file mode 100644 index 2145fdd..0000000 --- a/ops/webhook-initializer/src/main.py +++ /dev/null @@ -1,62 +0,0 @@ -import os -import requests -import json -import time - -# Replace with your GitHub organization name -GITHUB_ORG = os.getenv('GITHUB_ORG') -# Replace with your personal access token -GITHUB_TOKEN = os.getenv('GITHUB_TOKEN') -# Get ngrok public URL -def get_ngrok_url(): - url = 'http://monitoring-platform-ngrok:4040/api/tunnels' - print("Waiting for ngrok to start...") - while True: - try: - response = requests.get(url) - if response.status_code == 200: - data = response.json() - for tunnel in data['tunnels']: - print(f"Found tunnel: {tunnel['name']}") - if tunnel['name'] == 'monitoring-platform-github-webhook-receiver': - return tunnel['public_url'] - except Exception as e: - print(f"Waiting for ngrok to start: {e}") - time.sleep(5) - -# Create the GitHub webhook -def add_org_webhook(org, token, webhook_url): - url = f"https://api.github.com/orgs/{org}/hooks" - headers = { - "Authorization": f"token {token}", - "Content-Type": "application/json" - } - data = { - "name": "web", - "active": True, - "events": [ - "workflow_run", - "workflow_job", - "check_run", - "push", - "pull_request", - "pull_request_review", - "pull_request_review_comment", - "issues", - "issue_comment" - ], - "config": { - "url": webhook_url, - "content_type": "json" - } - } - response = requests.post(url, headers=headers, data=json.dumps(data)) - if response.status_code in [200, 201]: - print(f"Successfully added webhook to organization {org}") - else: - print(f"Failed to add webhook to organization {org}: {response.status_code} {response.text}") - -if __name__ == "__main__": - public_url = get_ngrok_url() - print(f"ngrok public URL: {public_url}/webhook") - add_org_webhook(GITHUB_ORG, GITHUB_TOKEN, f"{public_url}/webhook") diff --git a/ops/webhook-initializer/src/requirements.txt b/ops/webhook-initializer/src/requirements.txt deleted file mode 100644 index 663bd1f..0000000 --- a/ops/webhook-initializer/src/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -requests \ No newline at end of file