Skip to content

Commit

Permalink
Gdb 6524 implement cluster scalability (#40)
Browse files Browse the repository at this point in the history
* Changed graphdb-utils-configmap update before any jobs

* Changed graphdb-node's service to always be headless, fixing a bug with scaling up from 1 node

* Cluster scale up, down, patch added

* Added waits on proxy to be ready before executing curl commands against it.

* Made scale up take precedent over patch and reimplemented patch job

* Changed waitAllNodes in graphdb.sh to go from last to first node so it's in the direction of a rolling update

* Updated CHANGELOG

* Formatted graphdb.sh

* Change chart and graphdb version to 10.0.1

* Updated documentation to use 10.0 instead of M3 links
  • Loading branch information
Sukhumi authored Jul 27, 2022
1 parent 5d5bc20 commit c63660a
Show file tree
Hide file tree
Showing 12 changed files with 362 additions and 99 deletions.
13 changes: 12 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,16 @@
# GraphDB Helm chart release notes
## Version 10.0
## Version 10.0.1

### Breaking
- The graphdb-node service now is always headless. If you installed Version 10.0.0 with `graphdb.clusterConfig.nodesCount` set to `1` you will have to delete the service prior to an update

### New
- Upgrade to GraphDB 10.0.1
- Cluster size can now be scaled
- Fixed an issue with deploying with security turned on
- Fixed an issue with the cluster proxy returning its internal address when queried externally

## Version 10.0.0

### Breaking
New major release that isn't compatible with the old chart, due to major breaking changes in Graphdb 10.
Expand Down
4 changes: 2 additions & 2 deletions Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ apiVersion: v2
name: graphdb
description: Helm chart for GraphDB
type: application
version: 10.0.0
appVersion: 10.0.0
version: 10.0.1
appVersion: 10.0.1
home: https://graphdb.ontotext.com/
icon: https://graphdb.ontotext.com/home/images/visual_Logo_GraphDB_02_12_2015.png
maintainers:
Expand Down
124 changes: 62 additions & 62 deletions README.md

Large diffs are not rendered by default.

43 changes: 13 additions & 30 deletions files/scripts/graphdb.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,34 +4,16 @@ set -eu
function createCluster {
waitAllNodes $1 $3
local configLocation=$2
local token=$3
local authToken=$3
local timeout=$4
echo "Creating cluster"
curl -o response.json -isSL -m $timeout -X POST --header "Authorization: Basic ${token}" --header 'Content-Type: application/json' --header 'Accept: */*' -d @"$configLocation" http://graphdb-node-0.graphdb-node:7200/rest/cluster/config
if grep -q 'HTTP/1.1 201' "response.json"; then
echo "Cluster creation successful!"
else if grep -q 'Cluster already exists.\|HTTP/1.1 409' "response.json" ; then
echo "Cluster already exists"
else
echo "Cluster creation failed, received response:"
cat response.json
echo
exit 1
fi
fi
}

function updateCluster {
#curl to leader/loadBalancer to update cluster
echo "Not implemented yet."
}

function deleteCluster {
curl -o response.json -isSL -m 15 -X DELETE --header 'Accept: */*' 'http://graphdb-node:7200/rest/cluster/config?force=false'
if grep -q 'HTTP/1.1 200' "response.json"; then
echo "Cluster deletion successful!"
curl -o response.json -isSL -m $timeout -X POST --header "Authorization: Basic ${authToken}" --header 'Content-Type: application/json' --header 'Accept: */*' -d @"$configLocation" http://graphdb-node-0.graphdb-node:7200/rest/cluster/config
if grep -q 'HTTP/1.1 201' "response.json"; then
echo "Cluster creation successful!"
elif grep -q 'Cluster already exists.\|HTTP/1.1 409' "response.json" ; then
echo "Cluster already exists"
else
echo "Cluster deletion failed, received response:"
echo "Cluster creation failed, received response:"
cat response.json
echo
exit 1
Expand All @@ -40,13 +22,13 @@ function deleteCluster {

function waitService {
local address=$1
local token=$2
local authToken=$2

local attempt_counter=0
local max_attempts=100

echo "Waiting for ${address}"
until $(curl --output /dev/null -fsSL -m 5 -H "Authorization: Basic ${token}" --silent --fail ${address}); do
until $(curl --output /dev/null -fsSL -m 5 -H "Authorization: Basic ${authToken}" --silent --fail ${address}); do
if [[ ${attempt_counter} -eq ${max_attempts} ]];then
echo "Max attempts reached"
exit 1
Expand All @@ -60,12 +42,13 @@ function waitService {

function waitAllNodes {
local node_count=$1
local token=$2
local authToken=$2

for (( c=0; c<$node_count; c++ ))
for (( c=$node_count; c>0; c ))
do
c=$((c-1))
local node_address=http://graphdb-node-$c.graphdb-node:7200
waitService "${node_address}/rest/repositories" "$token"
waitService "${node_address}/rest/repositories" "$authToken"
done
}

Expand Down
142 changes: 142 additions & 0 deletions files/scripts/update-cluster.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
#!/usr/bin/env bash
set -eu

function patchCluster {
local configLocation=$1
local authToken=$2
echo "Patching cluster"
waitService "http://graphdb-cluster-proxy:7200/proxy/ready" "$authToken"
curl -o patchResponse.json -isSL -m 15 -X PATCH --header "Authorization: Basic ${authToken}" --header 'Content-Type: application/json' --header 'Accept: application/json' -d @"$configLocation" 'http://graphdb-cluster-proxy:7200/rest/cluster/config'
if grep -q 'HTTP/1.1 200' "patchResponse.json"; then
echo "Patch successful"
elif grep -q 'Cluster does not exist.\|HTTP/1.1 412' "patchResponse.json" ; then
echo "Cluster does not exist"
else
echo "Cluster patch failed, received response:"
cat patchResponse.json
echo
exit 1
fi
}

function removeNodes {
local expectedNodes=$1
local authToken=$2
local namespace=$3
local currentNodes=$(getNodeCountInCurrentCluster "$authToken")
local nodes=""
echo "Cluster reported: $currentNodes current nodes"
echo "Cluster is expected to have: $expectedNodes nodes"
# if there is no cluster or current nodes are less or equal to expected so no need to remove more, exit
if [ "$currentNodes" -lt 2 ] || [ "$currentNodes" -le "$expectedNodes" ]; then
echo "No scaling down of the cluster required"
exit 0
fi
# if there is a cluster and we wanna scale to 1 node, delete it (we would have exit on the last if in case on no cluster)
if [ "$expectedNodes" -lt 2 ]; then
echo "Scaling down to 1 node. Deleting cluster"
deleteCluster "$authToken"
exit 0
fi
echo "Scaling the cluster down"
for ((i = expectedNodes; i < currentNodes; i++)) do
nodes=${nodes}\"graphdb-node-$i.graphdb-node.${namespace}.svc.cluster.local:7300\"
if [ $i -lt $(expr $currentNodes - 1) ]; then
nodes=${nodes}\,
fi
done
nodes=\{\"nodes\":\[${nodes}\]\}
waitService "http://graphdb-cluster-proxy:7200/proxy/ready" "$authToken"
curl -o clusterRemove.json -isSL -m 15 -X DELETE --header 'Content-Type: application/json' --header 'Accept: application/json' --header "Authorization: Basic ${authToken}" -d "${nodes}" 'http://graphdb-cluster-proxy:7200/rest/cluster/config/node'
if grep -q 'HTTP/1.1 200' "clusterRemove.json"; then
echo "Scaling down successful."
else
echo "Issue scaling down:"
cat clusterRemove.json
echo
exit 1
fi
}

function addNodes {
local expectedNodes=$1
local authToken=$2
local namespace=$3
local timeout=$4
local currentNodes=$(getNodeCountInCurrentCluster "$authToken")
local nodes=""
echo "Cluster reported: $currentNodes current nodes"
echo "Cluster is expected to have: $expectedNodes nodes"
# if there is no cluster or current nodes are more or equal to expected so no need to add more, exit
if [ "$currentNodes" -lt 2 ] || [ "$currentNodes" -ge "$expectedNodes" ]; then
echo "No scaling up of the cluster required"
exit 0
fi
echo "Scaling the cluster up"
for ((i = currentNodes; i < expectedNodes; i++)) do
nodes=${nodes}\"graphdb-node-$i.graphdb-node.${namespace}.svc.cluster.local:7300\"
if [ $i -lt $(expr $expectedNodes - 1) ]; then
nodes=${nodes}\,
fi
done
nodes=\{\"nodes\":\[${nodes}\]\}
waitService "http://graphdb-cluster-proxy:7200/proxy/ready" "$authToken"
curl -o clusterAdd.json -isSL -m ${timeout} -X POST --header 'Content-Type: application/json' --header 'Accept: application/json' --header "Authorization: Basic ${authToken}" -d "${nodes}" 'http://graphdb-cluster-proxy:7200/rest/cluster/config/node'
if grep -q 'HTTP/1.1 200' "clusterAdd.json"; then
echo "Scaling successful."
elif grep -q 'Mismatching fingerprints\|HTTP/1.1 412' "clusterAdd.json"; then
echo "Issue scaling:"
cat clusterAdd.json
echo
echo "Manual clear of the mismatched repositories will be required to add the node"
exit 1
else
echo "Issue scaling:"
cat clusterAdd.json
echo
exit 1
fi
}

function deleteCluster {
local authToken=$1
waitService "http://graphdb-node-0.graphdb-node:7200/rest/repositories" "$authToken"
curl -o response.json -isSL -m 15 -X DELETE --header "Authorization: Basic ${authToken}" --header 'Accept: */*' 'http://graphdb-node-0.graphdb-node:7200/rest/cluster/config?force=false'
if grep -q 'HTTP/1.1 200' "response.json"; then
echo "Cluster deletion successful!"
elif grep -q 'Node is not part of the cluster.\|HTTP/1.1 412' "response.json" ; then
echo "No cluster present."
else
echo "Cluster deletion failed, received response:"
cat response.json
echo
exit 1
fi
}

function getNodeCountInCurrentCluster {
local authToken=$1
local node_address=http://graphdb-node-0.graphdb-node:7200
waitService "${node_address}/rest/repositories" "$authToken"
curl -o clusterResponse.json -isSL -m 15 -X GET --header 'Content-Type: application/json' --header "Authorization: Basic ${authToken}" --header 'Accept: */*' "${node_address}/rest/cluster/config"
grep -o 'graphdb-node-' "clusterResponse.json" | grep -c ""
}

function waitService {
local address=$1
local authToken=$2

local attempt_counter=0
local max_attempts=100

until $(curl --output /dev/null -fsSL -m 5 -H "Authorization: Basic ${authToken}" --silent --fail ${address}); do
if [[ ${attempt_counter} -eq ${max_attempts} ]];then
echo "Max attempts reached"
exit 1
fi
attempt_counter=$((attempt_counter+1))
sleep 5
done
}

"$@"
2 changes: 1 addition & 1 deletion templates/graphdb-node.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,7 @@ metadata:
app: graphdb-node
{{- include "graphdb.labels" . | nindent 4 }}
spec:
clusterIP: None
selector:
app: graphdb-node
ports:
Expand All @@ -210,5 +211,4 @@ spec:
port: 7300
targetPort: 7300
protocol: TCP
clusterIP: None
{{- end }}
6 changes: 6 additions & 0 deletions templates/graphdb-utils-configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@ metadata:
labels:
name: graphdb-utils-configmap
{{- include "graphdb.labels" . | nindent 4 }}
annotations:
"helm.sh/hook": pre-install, pre-upgrade, pre-rollback, post-install, post-upgrade, post-rollback
"helm.sh/hook-delete-policy": before-hook-creation, hook-succeeded, hook-failed
"helm.sh/hook-weight": "-10"
data:
graphdb.sh: |-
{{ tpl (.Files.Get "files/scripts/graphdb.sh" | indent 4) . }}
update-cluster.sh: |-
{{ tpl (.Files.Get "files/scripts/update-cluster.sh" | indent 4) . }}
45 changes: 45 additions & 0 deletions templates/jobs/patch-cluster-job.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
{{- if gt (int .Values.graphdb.clusterConfig.nodesCount) 1 }}
{{- $authToken := printf "%s:%s" .Values.graphdb.security.provisioningUsername .Values.graphdb.security.provisioningPassword | b64enc }}
apiVersion: {{ $.Values.versions.job }}
kind: Job
metadata:
name: patch-cluster-job
labels:
{{- include "graphdb.labels" . | nindent 4 }}
annotations:
"helm.sh/hook": post-upgrade, post-rollback
"helm.sh/hook-delete-policy": before-hook-creation, hook-succeeded, hook-failed
"helm.sh/hook-weight": "2"
spec:
ttlSecondsAfterFinished: 300
template:
spec:
imagePullSecrets:
{{- include "combinedImagePullSecrets" $ | nindent 8 }}
containers:
- name: patch-cluster
image: {{ include "renderFullImageName" (dict "globalRegistry" $.Values.global.imageRegistry "image" $.Values.images.graphdb) }}
securityContext:
allowPrivilegeEscalation: false
runAsUser: 0
volumeMounts:
- name: graphdb-utils
mountPath: /tmp/utils
- name: cluster-config
mountPath: /tmp/cluster-config
command: ['sh','-c']
args:
- |
cp /tmp/cluster-config/cluster-config.json /usr/local/bin/cluster-config.json
cp /tmp/utils/update-cluster.sh /usr/local/bin/update-cluster.sh; chmod +x /usr/local/bin/update-cluster.sh
/usr/local/bin/update-cluster.sh patchCluster "/usr/local/bin/cluster-config.json" "{{ $authToken }}" >> /proc/1/fd/1
restartPolicy: Never
volumes:
- name: cluster-config
configMap:
name: graphdb-cluster-config-configmap
- name: graphdb-utils
configMap:
name: graphdb-utils-configmap
backoffLimit: 4
{{- end }}
4 changes: 2 additions & 2 deletions templates/jobs/post-start-job.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{{- if gt (int .Values.graphdb.clusterConfig.nodesCount) 1 }}
#Set auth token var
{{- $authToken := printf "%s:%s" .Values.graphdb.security.provisioningUsername .Values.graphdb.security.provisioningPassword | b64enc }}
apiVersion: batch/v1
apiVersion: {{ $.Values.versions.job }}
kind: Job
metadata:
name: create-graphdb-cluster-job
Expand All @@ -10,6 +10,7 @@ metadata:
annotations:
"helm.sh/hook": post-install, post-upgrade, post-rollback
"helm.sh/hook-delete-policy": before-hook-creation, hook-succeeded, hook-failed
"helm.sh/hook-weight": "-1"
spec:
ttlSecondsAfterFinished: 300
template:
Expand Down Expand Up @@ -43,4 +44,3 @@ spec:
name: graphdb-utils-configmap
backoffLimit: 9
{{- end }}

37 changes: 37 additions & 0 deletions templates/jobs/scale-down-cluster-job.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
{{- $authToken := printf "%s:%s" .Values.graphdb.security.provisioningUsername .Values.graphdb.security.provisioningPassword | b64enc }}
apiVersion: {{ $.Values.versions.job }}
kind: Job
metadata:
name: scale-down-cluster-job
labels:
{{- include "graphdb.labels" . | nindent 4 }}
annotations:
"helm.sh/hook": pre-upgrade, pre-rollback
"helm.sh/hook-delete-policy": before-hook-creation, hook-succeeded, hook-failed
spec:
ttlSecondsAfterFinished: 300
template:
spec:
imagePullSecrets:
{{- include "combinedImagePullSecrets" $ | nindent 8 }}
containers:
- name: scale-down-cluster
image: {{ include "renderFullImageName" (dict "globalRegistry" $.Values.global.imageRegistry "image" $.Values.images.graphdb) }}
securityContext:
allowPrivilegeEscalation: false
runAsUser: 0
volumeMounts:
- name: graphdb-utils
mountPath: /tmp/utils
command: ['sh','-c']
args:
- |
cp /tmp/utils/update-cluster.sh /usr/local/bin/update-cluster.sh; chmod +x /usr/local/bin/update-cluster.sh
/usr/local/bin/update-cluster.sh removeNodes {{ .Values.graphdb.clusterConfig.nodesCount }} "{{ $authToken }}" {{ $.Release.Namespace }} >> /proc/1/fd/1
restartPolicy: Never
volumes:
- name: graphdb-utils
configMap:
name: graphdb-utils-configmap
backoffLimit: 4

Loading

0 comments on commit c63660a

Please sign in to comment.