Skip to content

Commit

Permalink
Adds Task Management Infra (#61)
Browse files Browse the repository at this point in the history
* Adds RabbitMQ StatefulSet

* Adds RMQ task system.

* Adds args to rmq server

* Adds the RMQ autoscaler.

* Set image version for rmq-http-bridge.

* Upgrades more ingresses.

* Updates rmq-autoscaler to pass shellcheck

* Adds rmq image cache

* Consolidates namespaces into a region, and adds tasks namespace

* Adds rmq config file

* Adds some things that are needed for the tasks namespace to work

* Gets RMQ ready in the tasks namespace.

* Adds rmq management ingresses

* Adds delay infrastructure init job

* Moves rmq-http-bridge dir

* Updates to add some vars to the http bridge

* Makes clustering work on rmq

* Disable debug logging on rmq

* Updates to deploy autoscaler

* Fix port on rabbitmq management connection string

* Use more modern image for rmq-worker autoscaler

* Make rmq autoscaler use the right deployment + fail faster

* Fix autoscaler script configmap name

* Update rmq bridge image

* Adds in a pvc template for rabbitmq

* Revert gitignore change

* Remove duplicate hope resource

* Updates init job image

* Upgrade rmq bridge server to 2 replicas

* Updates rabbitmq version
  • Loading branch information
Eagerod authored Jun 9, 2024
1 parent 4384858 commit a14d62f
Show file tree
Hide file tree
Showing 8 changed files with 548 additions and 16 deletions.
14 changes: 14 additions & 0 deletions .env.empty
Original file line number Diff line number Diff line change
Expand Up @@ -166,3 +166,17 @@ export WIREGUARD_PEER_1_PUBLIC_KEY="$(wg pubkey <<< "$WIREGUARD_PEER_1_PRIVATE_K
export DEV_SSH_KEY_BASE64="$($BASE64 ~/.ssh/id_rsa)"
export DEV_SSH_PUBLIC_KEY_BASE64="$($BASE64 ~/.ssh/id_rsa.pub)"
export DEV_KNOWN_HOSTS_BASE64="$($BASE64 ~/.ssh/known_hosts)"


# RabbitMQ
RMQ_USERNAME=admin
RMQ_PASSWORD=$(uuidgen)
RMQ_ERLANG_COOKIE=generated-string
RMQ_CONNECTION_STRING="amqp://$RMQ_USERNAME:[email protected]"
RMQ_MANAGEMENT_CONNECTION_STRING="http://$RMQ_USERNAME:[email protected]:15672"

export RMQ_USERNAME_BASE64="$(echo -n "$RMQ_USERNAME" | $BASE64)"
export RMQ_PASSWORD_BASE64="$(echo -n "$RMQ_PASSWORD" | $BASE64)"
export RMQ_ERLANG_COOKIE_BASE64="$(echo -n "$RMQ_ERLANG_COOKIE" | $BASE64)"
export RMQ_CONNECTION_STRING_BASE64="$(echo -n "$RMQ_CONNECTION_STRING" | $BASE64)"
export RMQ_MANAGEMENT_CONNECTION_STRING_BASE64="$(echo -n "$RMQ_MANAGEMENT_CONNECTION_STRING" | $BASE64)"
114 changes: 98 additions & 16 deletions hope.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,31 @@ nodes:
loglevel: *log_level
pod_network_cidr: 10.244.0.0/16
resources:
# region: Namespaces
- name: load-balancer-namespace
inline: |
apiVersion: v1
kind: Namespace
metadata:
name: metallb-system
labels:
app: metallb
tags: [network, load-balancer]
- name: dev-namespace
inline: |
apiVersion: v1
kind: Namespace
metadata:
name: dev
tags: [rotate-node]
- name: tasks-namespace
inline: |
apiVersion: v1
kind: Namespace
metadata:
name: tasks
tags: [rmq, tasks]
# endregion
- name: calico
file: calico.yaml
tags: [network]
Expand Down Expand Up @@ -166,15 +191,6 @@ resources:
name: aleem
namespace: kube-system
tags: [users]
- name: load-balancer-namespace
inline: |
apiVersion: v1
kind: Namespace
metadata:
name: metallb-system
labels:
app: metallb
tags: [network, load-balancer]
- name: load-balancer
file: metallb.yaml
tags: [network, load-balancer]
Expand Down Expand Up @@ -434,6 +450,13 @@ resources:
- DOCKER_REGISTRY_HOSTNAME
- DOCKER_CONFIG_JSON_FILE_CONTENTS_BASE64
tags: [apps, registry]
- name: cluster-registry-secrets-tasks
file: registry/registry-secrets.yaml
parameters:
- NAMESPACE=tasks
- DOCKER_REGISTRY_HOSTNAME
- DOCKER_CONFIG_JSON_FILE_CONTENTS_BASE64
tags: [apps, registry]
- name: docker-registry-htpasswd-secrets
inline: |
apiVersion: v1
Expand Down Expand Up @@ -637,6 +660,12 @@ resources:
pull: *docker_hub_upstream_pull
tag: registry.internal.aleemhaji.com/nginx:1.27.0
tags: [docker-cache, knowledge]
- name: rabbitmq-image-cache
build:
source: rabbitmq:3.13.3-management
pull: *docker_hub_upstream_pull
tag: registry.internal.aleemhaji.com/rabbitmq:3.13.3-management
tags: [docker-cache, rmq]
# endregion
- name: pihole
file: pihole/pihole.yaml
Expand Down Expand Up @@ -897,6 +926,57 @@ resources:
- name: mongodb
file: mongodb/mongodb.yaml
tags: [apps, mongodb]
- name: rabbitmq-config
inline: |
apiVersion: v1
kind: Secret
metadata:
name: rabbitmq-secrets
namespace: tasks
data:
username: ${RMQ_USERNAME_BASE64}
password: ${RMQ_PASSWORD_BASE64}
.erlang-cookie: ${RMQ_PASSWORD_BASE64}
connection_string: ${RMQ_CONNECTION_STRING_BASE64}
management_connection_string: ${RMQ_MANAGEMENT_CONNECTION_STRING_BASE64}
---
apiVersion: v1
kind: ConfigMap
metadata:
name: rmq-bridge-autoscaler-script
namespace: tasks
binaryData:
autoscaler.sh: ${AUTOSCALER_SCRIPT}
parameters:
- RMQ_USERNAME_BASE64
- RMQ_PASSWORD_BASE64
- RMQ_ERLANG_COOKIE_BASE64
- RMQ_CONNECTION_STRING_BASE64
- RMQ_MANAGEMENT_CONNECTION_STRING_BASE64
fileParameters:
- AUTOSCALER_SCRIPT=tasks/autoscaler.sh
tags: [apps, rmq]
- name: rabbitmq
file: tasks/rabbitmq.yaml
tags: [apps, rmq]
- name: rabbitmq-http-bridge-init
file: tasks/rmq-http-bridge-init.yaml
tags: [apps, rmq]
- name: rabbitmq-http-bridge-init-wait
job: tasks/rabbitmq-delay-infrastructure-init
tags: [apps, rmq]
- name: rabbitmq-http-bridge
file: tasks/rmq-http-bridge.yaml
tags: [apps, rmq]
- name: rabbitmq-http-bridge-autoscaler-image
build:
path: tasks
pull: *local_upstream_pull
tag: registry.internal.aleemhaji.com/rmq-bridge-autoscaler:latest
tags: [apps, rmq]
- name: rabbitmq-http-bridge-autoscaler
file: tasks/rmq-http-bridge-autoscaler.yaml
tags: [apps, rmq]
# endregion
# region: Database-Dependent Services
# This section contains the listing of services that rely on some form of
Expand Down Expand Up @@ -1140,6 +1220,15 @@ resources:
fileParameters:
- UPDATE_SECRETS_SCRIPT=certbot/certbot-copy-script.sh
tags: [crons, certbot]
- name: certbot-update-tasks
file: certbot/certbot-generic-cron.yaml
parameters:
- KUBERNETES_NAMESPACE=tasks
- INCLUDE_EXTERNAL_CERTS=false
- INCLUDE_BARE_DOMAIN=false
fileParameters:
- UPDATE_SECRETS_SCRIPT=certbot/certbot-copy-script.sh
tags: [crons, certbot]
- name: certbot-cron
file: certbot/certbot.yaml
fileParameters:
Expand Down Expand Up @@ -1216,13 +1305,6 @@ resources:
fileParameters:
- JOBS_SHELL_MONITOR_SCRIPT=shell-monitor/delete-manual-jobs-monitor.sh
tags: [shell-monitor]
- name: dev-namespace
inline: |
apiVersion: v1
kind: Namespace
metadata:
name: dev
tags: [rotate-node]
- name: home-network-image
build:
path: .
Expand Down
10 changes: 10 additions & 0 deletions tasks/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
FROM registry.internal.aleemhaji.com/kubectl:1.21.0

USER root

RUN \
apt-get update && \
apt-get install -y jq && \
rm -rf /var/lib/apt/lists/*

USER 1001
75 changes: 75 additions & 0 deletions tasks/autoscaler.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#!/usr/bin/env bash
#
set -eufo pipefail

DEPLOYMENT="rmq-http-bridge-worker"
MAX_REPLICAS=6
MIN_REPLICAS=2
MAX_MESSAGES=100

last_result=

# Forces the -u failure to not be in a pipeline.
if [ -z "$TASK_SERVER_URL" ]; then
echo >&2 "Failed to find TASK_SERVER_URL"
exit 1
fi
if [ -z "$DEPLOYMENT" ]; then
echo >&2 "Failed to find DEPLOYMENT"
exit 1
fi

current_replicas=$(kubectl get deployment $DEPLOYMENT -o template='{{.status.replicas}}')

# Update number of replicas once every 3 minutes, just in case anything weird
# happened.
i=0

# Conditions for an up-scale/down-scale event:
# If sustained flow with the current rates for 1 minute results in >
# MAX_MESSAGES, and the current number of replicas is < MAX_REPLICAS, add a
# replica.
# If sustained flow with the current rates for 1 minute results in < 0
# messages, and the current number of replicas is > MIN_REPLICAS, eliminate
# a replica.
# Otherwise, do nothing.
# In every case, require 2 readings of the same result to trigger an event.
while true; do
r=$(curl -fsSL "$TASK_SERVER_URL/stats" | jq '.Messages + (.InRate - .OutRate) * 60' | awk -F. '{print $1}')
if [ "$r" -ge "$MAX_MESSAGES" ] && [ "$current_replicas" -lt "$MAX_REPLICAS" ]; then
last_result="${last_result}1"
elif [ "$r" -le 0 ] && [ "$current_replicas" -gt "$MIN_REPLICAS" ]; then
last_result="${last_result}0"
else
last_result=""
fi

# Wait an extra 5 seconds after a scaling event.
if [ "$last_result" = "11" ]; then
echo >&2 "Scaling up the current number of replicas ($current_replicas) by 1"
current_replicas=$((current_replicas + 1))
kubectl scale deployment $DEPLOYMENT --replicas=$current_replicas
last_result=""
sleep 5
elif [ "$last_result" = "00" ]; then
echo >&2 "Scaling down the current number of replicas ($current_replicas) by 1"
current_replicas=$((current_replicas - 1))
kubectl scale deployment $DEPLOYMENT --replicas=$current_replicas
last_result=""
sleep 5
elif [ ${#last_result} -ge 2 ]; then
echo >&2 "High volatility recently. Resetting measurements."
last_result=""
fi

sleep 5
i=$((i + 1))
if [ "$i" -ge 36 ]; then
new_replicas=$(kubectl get deployment $DEPLOYMENT -o template='{{.status.replicas}}')
if [ "$new_replicas" -ne "$current_replicas" ]; then
current_replicas=$new_replicas
echo >&2 "Restored number of replicas to $current_replicas"
fi
i=0
fi
done
124 changes: 124 additions & 0 deletions tasks/rabbitmq.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: rabbitmq
namespace: tasks
spec:
revisionHistoryLimit: 0
replicas: 2
selector:
matchLabels:
app: rabbitmq
serviceName: rabbitmq
template:
metadata:
labels:
app: rabbitmq
spec:
imagePullSecrets:
- name: registry.internal.aleemhaji.com
containers:
- name: rabbitmq
image: registry.internal.aleemhaji.com/rabbitmq:3.13.3-management
ports:
- containerPort: 5672
- containerPort: 15672
command:
- sh
- -eufc
- |
printf 'cluster_formation.classic_config.nodes.1 = %s\ncluster_formation.classic_config.nodes.2 = %s\n' \
[email protected] [email protected] > /var/lib/rabbitmq/rabbitmq.conf
rabbitmq-plugins --offline enable rabbitmq_shovel rabbitmq_shovel_management
RABBITMQ_NODENAME=rabbit@$HOSTNAME.rabbitmq rabbitmq-server --erlang-cookie "${ERLANG_COOKIE}"
env:
- name: RABBITMQ_DEFAULT_USER
valueFrom:
secretKeyRef:
name: rabbitmq-secrets
key: username
- name: RABBITMQ_DEFAULT_PASS
valueFrom:
secretKeyRef:
name: rabbitmq-secrets
key: password
- name: ERLANG_COOKIE
valueFrom:
secretKeyRef:
name: rabbitmq-secrets
key: .erlang-cookie
- name: RABBITMQ_USE_LONGNAME
value: "true"
- name: RABBITMQ_CONFIG_FILE
value: /var/lib/rabbitmq/rabbitmq.conf
volumeMounts:
- name: rabbitmq-persistent-storage
mountPath: /var/lib/rabbitmq
volumes:
- name: rabbitmq-erlang-cookie
secret:
secretName: rabbitmq-secrets
volumeClaimTemplates:
- metadata:
name: rabbitmq-persistent-storage
spec:
accessModes: [ReadWriteOnce]
storageClassName: freenas-iscsi
resources:
requests:
storage: 1Gi
---
apiVersion: v1
kind: Service
metadata:
name: rabbitmq
namespace: tasks
labels:
app: rabbitmq
spec:
ports:
- port: 5672
name: rmq
protocol: TCP
- port: 15672
name: management
protocol: TCP
selector:
app: rabbitmq
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: rabbitmq-management-ingress-redirect
namespace: tasks
annotations:
kubernetes.io/ingress.class: "nginx"
nginx.ingress.kubernetes.io/temporal-redirect: https://rabbitmq-management.internal.aleemhaji.com
spec:
rules:
- host: rabbitmq-management
- host: rabbitmq-management.home
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: rabbitmq-management-ingress
namespace: tasks
annotations:
kubernetes.io/ingress.class: "nginx"
spec:
tls:
- hosts:
- rabbitmq-management.internal.aleemhaji.com
secretName: internal-certificate-files
rules:
- host: rabbitmq-management.internal.aleemhaji.com
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: rabbitmq
port:
number: 15672
Loading

0 comments on commit a14d62f

Please sign in to comment.