Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[geth] add upload/download datadir snapshot from S3 #266

Merged
merged 5 commits into from
Oct 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion dysnix/geth/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ apiVersion: v2
name: geth
description: Go-ethereum blockchain node Helm Chart

version: 1.0.10
version: 1.0.11
appVersion: v1.13.2

keywords:
Expand Down
10 changes: 10 additions & 0 deletions dysnix/geth/templates/configmap-scripts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,13 @@ metadata:
data:
check-readiness.sh: |-
{{- include (print $.Template.BasePath "/scripts/_check-readiness.tpl") . | nindent 4 }}
{{- if or .Values.syncToS3.enabled .Values.initFromS3.eanbled }}
init-from-s3.sh: |-
{{- include (print $.Template.BasePath "/scripts/_init-from-s3.tpl") . | nindent 4 }}
sync-to-s3.sh: |-
{{- include (print $.Template.BasePath "/scripts/_sync-to-s3.tpl") . | nindent 4 }}
s3-env.sh: |-
{{- include (print $.Template.BasePath "/scripts/_s3-env.tpl") . | nindent 4 }}
s3-cron.sh: |-
{{- include (print $.Template.BasePath "/scripts/_s3-cron.tpl") . | nindent 4 }}
{{- end }}
31 changes: 31 additions & 0 deletions dysnix/geth/templates/rbac.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
{{- if .Values.syncToS3.enabled }}
{{- $fullName := include "geth.fullname" . }}
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: {{ $fullName }}
labels: {{ include "geth.labels" . | nindent 4 }}
rules:
- apiGroups: [""]
resources:
- configmaps
resourceNames:
- {{ $fullName }}-s3-config
verbs:
- get
- patch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: {{ $fullName }}
labels: {{ include "geth.labels" . | nindent 4 }}
subjects:
- kind: ServiceAccount
name: {{ $fullName }}
namespace: {{ .Release.Namespace }}
roleRef:
kind: Role
name: {{ $fullName }}
apiGroup: rbac.authorization.k8s.io
{{- end }}
13 changes: 13 additions & 0 deletions dysnix/geth/templates/s3-configmap.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{{- if or .Values.initFromS3.enabled .Values.syncToS3.enabled }}
apiVersion: v1
kind: ConfigMap
metadata:
name: {{ include "geth.fullname" . }}-s3-config
data:
DATA_DIR: /root/.ethereum
SYNC_TO_S3: "False"
S3_BASE_URL: {{ tpl .Values.s3config.baseUrl . }}
S3_CHAINDATA_URL: {{ tpl .Values.s3config.chaindataUrl . }}
S3_ANCIENT_URL: {{ tpl .Values.s3config.ancientUrl . }}
FORCE_INIT: {{ ternary "True" "False" .Values.initFromS3.force | quote }}
{{- end }}
45 changes: 45 additions & 0 deletions dysnix/geth/templates/s3-cronjob-rbac.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
{{- if .Values.syncToS3.cronjob.enabled -}}
{{- $fullName := print (include "geth.fullname" .) "-s3-cronjob" }}
apiVersion: v1
kind: ServiceAccount
metadata:
name: {{ $fullName }}
labels: {{ include "geth.labels" . | nindent 4 }}
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: {{ $fullName }}
labels: {{ include "geth.labels" . | nindent 4 }}
rules:
- apiGroups: [""]
resources:
- pods
verbs:
- get
- list
- watch
- delete
- apiGroups: [""]
resources:
- configmaps
resourceNames:
- {{ include "geth.fullname" . }}-s3-config
verbs:
- get
- patch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: {{ $fullName }}
labels: {{ include "geth.labels" . | nindent 4 }}
subjects:
- kind: ServiceAccount
name: {{ $fullName }}
namespace: {{ .Release.Namespace }}
roleRef:
kind: Role
name: {{ $fullName }}
apiGroup: rbac.authorization.k8s.io
{{- end }}
71 changes: 71 additions & 0 deletions dysnix/geth/templates/s3-cronjob.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
{{- if .Values.syncToS3.cronjob.enabled }}
apiVersion: batch/v1
kind: CronJob
metadata:
name: {{ include "geth.fullname" . }}-sync-to-s3
labels:
{{- include "geth.labels" . | nindent 4 }}
spec:
{{- with .Values.syncToS3.cronjob }}
schedule: "{{ .schedule }}"
concurrencyPolicy: Forbid
startingDeadlineSeconds: 300
jobTemplate:
metadata:
name: {{ include "geth.fullname" $ }}-sync-to-s3
spec:
activeDeadlineSeconds: 60
backoffLimit: 0
template:
metadata:
labels:
{{- include "geth.labels" $ | nindent 12 }}
spec:
restartPolicy: OnFailure
{{- with .imagePullSecrets }}
imagePullSecrets:
{{- toYaml . | nindent 12 }}
{{- end }}
serviceAccountName: {{ include "geth.fullname" $ }}-s3-cronjob
{{- with .podSecurityContext }}
securityContext:
{{- toYaml . | nindent 12 }}
{{- end }}
{{- with .nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 12 }}
{{- end }}
{{- with .affinity }}
affinity:
{{- toYaml . | nindent 12 }}
{{- end }}
{{- with .tolerations }}
tolerations:
{{- toYaml . | nindent 12 }}
{{- end }}
containers:
- name: enable-sync-to-s3
image: "{{ .image.repository }}:{{ .image.tag }}"
imagePullPolicy: {{ .image.pullPolicy | quote }}
{{- with .securityContext }}
securityContext:
{{- toYaml . | nindent 14 }}
{{- end }}
command:
- /bin/sh
- /scripts/s3-cron.sh
- enable_sync
- 5s
volumeMounts:
- name: scripts
mountPath: /scripts
{{- with .resources }}
resources:
{{- toYaml . | nindent 14 }}
{{- end }}
volumes:
- name: scripts
configMap:
name: {{ template "geth.fullname" $ }}-scripts
{{- end }}
{{- end }}
10 changes: 10 additions & 0 deletions dysnix/geth/templates/s3-secret.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{{- if or .Values.initFromS3.enabled .Values.syncToS3.enabled }}
apiVersion: v1
kind: Secret
metadata:
name: {{ include "geth.fullname" . }}-s3-secret
data:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

WDYT on stringData instead of data ? It may save a couple bytes by removing toString|b64enc

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just left it in data cuz changes to secrets will be visible in helm diff

Copy link
Contributor

@voron voron Oct 27, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm, helm diff doesn't display secrets' values, as long as --show-secrets option is missing

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using data instead of stringData I can capture this in helm diff output.

geth, geth-2-s3-secret, Secret (v1) has changed:
  apiVersion: v1
  kind: Secret
  metadata:
    name: geth-2-s3-secret
  data:
-   AWS_ACCESS_KEY_ID: '-------- # (61 bytes)'
+   AWS_ACCESS_KEY_ID: '++++++++ # (62 bytes)'
    AWS_SECRET_ACCESS_KEY: 'REDACTED # (40 bytes)'
    S3_ENDPOINT_URL: 'REDACTED # (30 bytes)'

I just need to see whether it is changed or not. Don't need secret values to be present in output.

When using stringData helm diff does not provide info whether secret has changed or not.

Copy link
Contributor

@voron voron Oct 27, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I cannot reproduce this using helm or helmfile and dysnix/raw chart with following values

templates:
  - |
    apiVersion: v1
    stringData:
      foo: newbar
    kind: Secret
    metadata:
      name: {{ .Release.Name }}
    type: Opaque

I change foo's value and see

-   foo: '-------- # (3 bytes)'
+   foo: '++++++++ # (6 bytes)'

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems, related issue has been fixed ~ a year ago.

S3_ENDPOINT_URL: {{ .Values.s3config.endpointUrl | toString | b64enc }}
AWS_ACCESS_KEY_ID: {{ .Values.s3config.accessKeyId | toString | b64enc }}
AWS_SECRET_ACCESS_KEY: {{ .Values.s3config.secretAccessKey | toString | b64enc }}
{{- end }}
94 changes: 94 additions & 0 deletions dysnix/geth/templates/scripts/_init-from-s3.tpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
#!/usr/bin/env sh
# shellcheck disable=SC2086,SC3037

set -e

. /scripts/s3-env.sh

process_inputs() {
# download even if already initialized
if [ "$FORCE_INIT" = "True" ]; then
echo "Force init enabled, existing data will be deleted."
rm -f "$INITIALIZED_FILE"
fi
# check if we are already initialized
if [ -f "$INITIALIZED_FILE" ]; then
echo "Blockchain already initialized. Exiting..."; exit 0
fi
# check for S3 credentials
if [ -z "$S3_ENDPOINT_URL" ] || [ -z "$AWS_ACCESS_KEY_ID" ] || [ -z "$AWS_SECRET_ACCESS_KEY" ]; then
echo "S3 credentials are not provided, exiting"; exit 1
fi
}

progress() {
remote_stats=$("$S5CMD" cat "s3://${STATS_URL}")
case $1 in
"start")
while true; do
inodes=$(df -Phi "$DATA_DIR" | tail -n 1 | awk '{print $3}')
size=$(df -P -BG "$DATA_DIR" | tail -n 1 | awk '{print $3}')G
echo -e "$(date -Iseconds) | SOURCE TOTAL ${remote_stats} | DST USED Inodes:\t${inodes} Size:\t${size}"
sleep 2
done &
progress_pid=$! ;;
"stop")
kill "$progress_pid"
progress_pid=0 ;;
"*")
echo "Unknown arg" ;;
esac
}

check_lockfile() {
if "$S5CMD" cat "s3://${LOCKFILE_URL}" >/dev/null 2>&1; then
echo "Found existing lockfile, snapshot might be corrupted. Aborting download.."
exit 1
fi
}

# stop all background tasks
interrupt() {
echo "Got interrupt signal, stopping..."
for i in "$@"; do kill $i; done
}

sync() {
# cleanup data always, s5cmd does not support "true" sync, it does not save object's timestamps
# https://github.com/peak/s5cmd/issues/532
echo "Cleaning up local data..."
rm -rf "$ANCIENT_DIR"
rm -rf "$CHAINDATA_DIR"
# recreate data directories
mkdir -p "$CHAINDATA_DIR"
mkdir -p "$ANCIENT_DIR"

echo "Starting download data from S3..."
progress start

# perform remote snapshot download and remove local objects which don't exist in snapshot
# run two jobs in parallel, one for chaindata, second for ancient data
time "$S5CMD" --stat sync $EXCLUDE_ANCIENT "s3://${CHAINDATA_URL}/*" "${CHAINDATA_DIR}/" >/dev/null &
download_chaindata=$!
time nice "$S5CMD" --stat sync --part-size 200 --concurrency 2 $EXCLUDE_CHAINDATA "s3://${ANCIENT_URL}/*" "${ANCIENT_DIR}/" >/dev/null &
download_ancient=$!

# handle interruption / termination
trap 'interrupt ${download_chaindata} ${download_ancient} ${progress_pid}' INT TERM
# wait for all syncs to complete
wait $download_chaindata $download_ancient

progress stop

# all done, mark as initialized
touch "$INITIALIZED_FILE"
}


main() {
process_inputs
check_lockfile
VladStarr marked this conversation as resolved.
Show resolved Hide resolved
sync
}

main
81 changes: 81 additions & 0 deletions dysnix/geth/templates/scripts/_s3-cron.tpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
#!/usr/bin/env sh
# shellcheck disable=SC1083

MODE="$1"
WAIT_TIMEOUT="$2"
CONFIGMAP_NAME={{ include "geth.fullname" . }}-s3-config
KUBECTL=$(which kubectl)
PATCH_DATA=""
POD_NAME={{ include "geth.fullname" . }}-0

check_ret(){
ret="$1"
msg="$2"
# allow to override exit code, default value is ret
exit_code=${3:-${ret}}
if [ ! "$ret" -eq 0 ]; then
echo "$msg"
echo "return code ${ret}, exit code ${exit_code}"
exit "$exit_code"
fi
}

check_pod_readiness() {
# wait for pod to become ready
echo "$(date -Iseconds) Waiting ${WAIT_TIMEOUT} for pod ${1} to become ready ..."
"$KUBECTL" wait --timeout="$WAIT_TIMEOUT" --for=condition=Ready pod "$1"
check_ret $? "$(date -Iseconds) Pod ${1} is not ready, nothing to do, exiting" 0

# ensuring pod is not terminating now
# https://github.com/kubernetes/kubernetes/issues/22839
echo "$(date -Iseconds) Checking for pod ${1} to not terminate ..."
deletion_timestamp=$("$KUBECTL" get -o jsonpath='{.metadata.deletionTimestamp}' pod "$1")
check_ret $? "$(date -Iseconds) Cannot get pod ${1}, abort"

[ -z "$deletion_timestamp" ]
check_ret $? "$(date -Iseconds) Pod ${1} is terminating now, try another time" 1
}

enable_sync() {
echo "$(date -Iseconds) Patching configmap ${CONFIGMAP_NAME} to enable sync"
PATCH_DATA='{"data":{"SYNC_TO_S3":"True"}}'
}

disable_sync() {
echo "$(date -Iseconds) Patching configmap ${CONFIGMAP_NAME} to disable sync"
PATCH_DATA='{"data":{"SYNC_TO_S3":"False"}}'
}

patch_configmap() {
"$KUBECTL" patch configmap "$CONFIGMAP_NAME" --type merge --patch "$PATCH_DATA"
check_ret $? "$(date -Iseconds) Fatal: cannot patch configmap ${CONFIGMAP_NAME}, abort"
}

delete_pod() {
echo "$(date -Iseconds) Deleting pod ${1} to trigger action inside init container ..."
# delete the pod to trigger action inside init container
"$KUBECTL" delete pod "$1" --wait=false
check_ret $? "$(date -Iseconds) Fatal: cannot delete pod ${1}, abort"
echo "$(date -Iseconds) Pod ${1} deleted successfully, exiting. Check pod logs after restart."
}

main() {
case "$MODE" in
"enable_sync")
check_pod_readiness "$POD_NAME"
enable_sync
patch_configmap
delete_pod "$POD_NAME"
;;
# intended to be run inside initContainer after successful sync
"disable_sync")
disable_sync
patch_configmap
;;
"*")
check_ret 1 "$(date -Iseconds) Mode value \"$MODE\" is incorrect, abort"
;;
esac
}

main
Loading
Loading