Skip to content

Commit

Permalink
Creates a managed Namespace mechanism for consistent tooling (#62)
Browse files Browse the repository at this point in the history
* Moves namespace definitions to a dedicated file

* Moves registry secrets into namespace definition

* Moves certbot copy into managed namespaces

* Adds the pod-killer to managed namespaces

* Moved job cleaner to managed namespaces
  • Loading branch information
Eagerod authored Jun 12, 2024
1 parent 3086709 commit 73d1af9
Show file tree
Hide file tree
Showing 11 changed files with 412 additions and 411 deletions.
86 changes: 0 additions & 86 deletions certbot/certbot-generic-cron.yaml

This file was deleted.

193 changes: 94 additions & 99 deletions hope.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -138,29 +138,102 @@ loglevel: *log_level
pod_network_cidr: 10.244.0.0/16
resources:
# region: Namespaces
- name: load-balancer-namespace
inline: |
apiVersion: v1
kind: Namespace
metadata:
name: metallb-system
labels:
app: metallb
tags: [network, load-balancer]
- name: default-namespace
file: infra/namespace.yaml
parameters:
- NAMESPACE=default
- DOCKER_REGISTRY_HOSTNAME
- DOCKER_CONFIG_JSON_FILE_CONTENTS_BASE64
- SLACK_BOT_ALERTING_CHANNEL
- INCLUDE_EXTERNAL_CERTS=true
- INCLUDE_BARE_DOMAIN=true
- POD_KILLER_CONTAINER_RESTART_LIMIT=10
- JOB_RETENTION_WINDOW=1 month
fileParameters:
- UPDATE_SECRETS_SCRIPT=infra/certbot-copy-script.sh
- POD_KILLER_SCRIPT=infra/pod-killer-script.sh
- JOBS_SHELL_MONITOR_SCRIPT=infra/delete-manual-jobs-script.sh
tags: [namespaces]
- name: dev-namespace
inline: |
apiVersion: v1
kind: Namespace
metadata:
name: dev
tags: [rotate-node]
file: infra/namespace.yaml
parameters:
- NAMESPACE=dev
- DOCKER_REGISTRY_HOSTNAME
- DOCKER_CONFIG_JSON_FILE_CONTENTS_BASE64
- SLACK_BOT_ALERTING_CHANNEL
- INCLUDE_EXTERNAL_CERTS=false
- INCLUDE_BARE_DOMAIN=false
- POD_KILLER_CONTAINER_RESTART_LIMIT=10
- JOB_RETENTION_WINDOW=1 month
fileParameters:
- UPDATE_SECRETS_SCRIPT=infra/certbot-copy-script.sh
- POD_KILLER_SCRIPT=infra/pod-killer-script.sh
- JOBS_SHELL_MONITOR_SCRIPT=infra/delete-manual-jobs-script.sh
tags: [namespaces, rotate-node]
- name: monitoring-namespace
file: infra/namespace.yaml
parameters:
- NAMESPACE=monitoring
- DOCKER_REGISTRY_HOSTNAME
- DOCKER_CONFIG_JSON_FILE_CONTENTS_BASE64
- SLACK_BOT_ALERTING_CHANNEL
- INCLUDE_EXTERNAL_CERTS=false
- INCLUDE_BARE_DOMAIN=false
- POD_KILLER_CONTAINER_RESTART_LIMIT=10
- JOB_RETENTION_WINDOW=1 month
fileParameters:
- UPDATE_SECRETS_SCRIPT=infra/certbot-copy-script.sh
- POD_KILLER_SCRIPT=infra/pod-killer-script.sh
- JOBS_SHELL_MONITOR_SCRIPT=infra/delete-manual-jobs-script.sh
tags: [namespaces, monitoring]
- name: kube-system-namespace
file: infra/namespace.yaml
parameters:
- NAMESPACE=kube-system
- DOCKER_REGISTRY_HOSTNAME
- DOCKER_CONFIG_JSON_FILE_CONTENTS_BASE64
- SLACK_BOT_ALERTING_CHANNEL
- INCLUDE_EXTERNAL_CERTS=false
- INCLUDE_BARE_DOMAIN=false
- POD_KILLER_CONTAINER_RESTART_LIMIT=10
- JOB_RETENTION_WINDOW=1 month
fileParameters:
- UPDATE_SECRETS_SCRIPT=infra/certbot-copy-script.sh
- POD_KILLER_SCRIPT=infra/pod-killer-script.sh
- JOBS_SHELL_MONITOR_SCRIPT=infra/delete-manual-jobs-script.sh
tags: [namespaces]
- name: kubernetes-dashboard-namespace
file: infra/namespace.yaml
parameters:
- NAMESPACE=kubernetes-dashboard
- DOCKER_REGISTRY_HOSTNAME
- DOCKER_CONFIG_JSON_FILE_CONTENTS_BASE64
- SLACK_BOT_ALERTING_CHANNEL
- INCLUDE_EXTERNAL_CERTS=false
- INCLUDE_BARE_DOMAIN=false
- POD_KILLER_CONTAINER_RESTART_LIMIT=10
- JOB_RETENTION_WINDOW=1 month
fileParameters:
- UPDATE_SECRETS_SCRIPT=infra/certbot-copy-script.sh
- POD_KILLER_SCRIPT=infra/pod-killer-script.sh
- JOBS_SHELL_MONITOR_SCRIPT=infra/delete-manual-jobs-script.sh
tags: [namespaces]
- name: tasks-namespace
inline: |
apiVersion: v1
kind: Namespace
metadata:
name: tasks
tags: [rmq, tasks]
file: infra/namespace.yaml
parameters:
- NAMESPACE=tasks
- DOCKER_REGISTRY_HOSTNAME
- DOCKER_CONFIG_JSON_FILE_CONTENTS_BASE64
- SLACK_BOT_ALERTING_CHANNEL
- INCLUDE_EXTERNAL_CERTS=false
- INCLUDE_BARE_DOMAIN=false
- POD_KILLER_CONTAINER_RESTART_LIMIT=10
- JOB_RETENTION_WINDOW=1 month
fileParameters:
- UPDATE_SECRETS_SCRIPT=infra/certbot-copy-script.sh
- POD_KILLER_SCRIPT=infra/pod-killer-script.sh
- JOBS_SHELL_MONITOR_SCRIPT=infra/delete-manual-jobs-script.sh
tags: [namespaces, rmq, tasks]
# endregion
- name: calico
file: calico.yaml
Expand Down Expand Up @@ -422,41 +495,6 @@ resources:
port:
number: 443
tags: [apps, dashboard]
- name: cluster-registry-secrets-default
file: registry/registry-secrets.yaml
parameters:
- NAMESPACE=default
- DOCKER_REGISTRY_HOSTNAME
- DOCKER_CONFIG_JSON_FILE_CONTENTS_BASE64
tags: [apps, registry]
- name: cluster-registry-secrets-dev
file: registry/registry-secrets.yaml
parameters:
- NAMESPACE=dev
- DOCKER_REGISTRY_HOSTNAME
- DOCKER_CONFIG_JSON_FILE_CONTENTS_BASE64
tags: [apps, registry]
- name: cluster-registry-secrets-monitoring
file: registry/registry-secrets.yaml
parameters:
- NAMESPACE=monitoring
- DOCKER_REGISTRY_HOSTNAME
- DOCKER_CONFIG_JSON_FILE_CONTENTS_BASE64
tags: [apps, registry]
- name: cluster-registry-secrets-kube-system
file: registry/registry-secrets.yaml
parameters:
- NAMESPACE=kube-system
- DOCKER_REGISTRY_HOSTNAME
- DOCKER_CONFIG_JSON_FILE_CONTENTS_BASE64
tags: [apps, registry]
- name: cluster-registry-secrets-tasks
file: registry/registry-secrets.yaml
parameters:
- NAMESPACE=tasks
- DOCKER_REGISTRY_HOSTNAME
- DOCKER_CONFIG_JSON_FILE_CONTENTS_BASE64
tags: [apps, registry]
- name: docker-registry-htpasswd-secrets
inline: |
apiVersion: v1
Expand Down Expand Up @@ -1191,49 +1229,6 @@ resources:
file: drone/drone.yaml
tags: [apps, drone]
# endregion
- name: pod-killer
file: pod-killer/pod-killer.yaml
parameters:
- POD_KILLER_NAMESPACE=default
- SLACK_BOT_ALERTING_CHANNEL
fileParameters:
- POD_KILLER_SCRIPT=pod-killer/pod-killer.sh
- name: certbot-update-kubernetes-dashboard
file: certbot/certbot-generic-cron.yaml
parameters:
- KUBERNETES_NAMESPACE=kubernetes-dashboard
- INCLUDE_EXTERNAL_CERTS=false
- INCLUDE_BARE_DOMAIN=false
fileParameters:
- UPDATE_SECRETS_SCRIPT=certbot/certbot-copy-script.sh
tags: [crons, certbot]
- name: certbot-update-monitoring
file: certbot/certbot-generic-cron.yaml
parameters:
- KUBERNETES_NAMESPACE=monitoring
- INCLUDE_EXTERNAL_CERTS=false
- INCLUDE_BARE_DOMAIN=false
fileParameters:
- UPDATE_SECRETS_SCRIPT=certbot/certbot-copy-script.sh
tags: [crons, certbot]
- name: certbot-update-default
file: certbot/certbot-generic-cron.yaml
parameters:
- KUBERNETES_NAMESPACE=default
- INCLUDE_EXTERNAL_CERTS=true
- INCLUDE_BARE_DOMAIN=true
fileParameters:
- UPDATE_SECRETS_SCRIPT=certbot/certbot-copy-script.sh
tags: [crons, certbot]
- name: certbot-update-tasks
file: certbot/certbot-generic-cron.yaml
parameters:
- KUBERNETES_NAMESPACE=tasks
- INCLUDE_EXTERNAL_CERTS=false
- INCLUDE_BARE_DOMAIN=false
fileParameters:
- UPDATE_SECRETS_SCRIPT=certbot/certbot-copy-script.sh
tags: [crons, certbot]
- name: certbot-cron
file: certbot/certbot.yaml
fileParameters:
Expand Down
7 changes: 7 additions & 0 deletions infra/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Various templates and such for managing things that keep the cluster neat.

Each namespace ships with:
- A set of docker registry secrets
- A `CronJob` to copy SSL certs from filesystem to Kubernetes `Secret`
- A `CronJob` to kill pods whose containers have > 10 restarts
- A `CronJob` to delete old manual job runs
File renamed without changes.
39 changes: 39 additions & 0 deletions infra/delete-manual-jobs-script.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#!/usr/bin/env sh
#
# Delete jobs that appear to be CronJobs that have been run manually from the
# Kubernetes Dashboard.
set -euf

SLACK_URL="https://slackbot.internal.aleemhaji.com/message"

slack() {
curl -sS -X POST -H "X-SLACK-CHANNEL-ID: ${SLACK_BOT_ALERTING_CHANNEL}" -d "$@" "$SLACK_URL"
}

if [ $# -ne 2 ]; then
echo >&2 "Usage:"
echo >&2 " $0 <namespace> <age>"
exit 1
fi

namespace="$1"
age_str="$2"

MANUAL_JOB_REGEXP='-manual-[[:alnum:]]\{3,5\}[[:space:]]'
ONE_MONTH_AGO="$(date -u -d "$age_str ago" '+%Y-%m-%dT%H:%M:%SZ')"
JOBS_COLUMNS='custom-columns=NAME:{.metadata.name},SUCCEEDED:{.status.succeeded},COMPLETED:{.status.completionTime}'
# shellcheck disable=SC2016
AWK_SCRIPT='{if ($2 == 1 && $3 < arg) print $1}'

slack 'Manual job-run cleaup running on '"$(hostname)"'.
Deleting successful manually run jobs in namespace "'"$namespace"'" older than '"$age_str"'.'

while true; do
echo "Run: $(date)"
kubectl -n "$namespace" get jobs -o "$JOBS_COLUMNS" | sed '1d' | grep -- "$MANUAL_JOB_REGEXP" | awk -v "arg=$ONE_MONTH_AGO" "$AWK_SCRIPT" | while read -r job; do
slack "Job monitor deleting old manually run job: $job"
kubectl -n "${namespace}" delete job "$job"
done

sleep 3600
done
Loading

0 comments on commit 73d1af9

Please sign in to comment.