diff --git a/dysnix/bsc/Chart.yaml b/dysnix/bsc/Chart.yaml index 1b80df0f..122b03bc 100644 --- a/dysnix/bsc/Chart.yaml +++ b/dysnix/bsc/Chart.yaml @@ -1,7 +1,7 @@ apiVersion: v2 name: bsc description: Binance Smart Chain chart for Kubernetes -version: 0.6.38 +version: 0.6.39 appVersion: 1.2.15 keywords: diff --git a/dysnix/bsc/templates/scripts/_init_from_gcs.tpl b/dysnix/bsc/templates/scripts/_init_from_gcs.tpl index 402b20e4..0fdce0cd 100644 --- a/dysnix/bsc/templates/scripts/_init_from_gcs.tpl +++ b/dysnix/bsc/templates/scripts/_init_from_gcs.tpl @@ -7,20 +7,25 @@ set -ex # -e exits on error # AWS_SECRET_ACCESS_KEY DATA_DIR="{{ .Values.bsc.base_path }}" -CHAINDATA_DIR="${DATA_DIR}/geth/chaindata" +GETH_DIR="${DATA_DIR}/geth" +CHAINDATA_DIR="${GETH_DIR}/chaindata" +STATE_TMP_DIR="${GETH_DIR}/state_tmp" +ANCIENT_TMP_DIR="${GETH_DIR}/ancient_tmp" INITIALIZED_FILE="${DATA_DIR}/.initialized" #without gs:// or s3://, just a bucket name and path INDEX_URL="{{ .Values.bsc.initFromGCS.indexUrl }}" GCS_BASE_URL="{{ .Values.bsc.initFromGCS.baseUrlOverride }}" S5CMD=/s5cmd -EXCLUDE_ANCIENT="--exclude *.cidx --exclude *.ridx --exclude *.cdat --exclude *.rdat" -EXCLUDE_STATE="--exclude *.ldb --exclude *.sst" INDEX="index" S_UPDATING="/updating" S_TIMESTAMP="/timestamp" S_STATE_URL="/state_url" S_ANCIENT_URL="/ancient_url" S_STATS="/stats" +MAX_USED_SPACE_PERCENT={{ .Values.bsc.initFromGCS.maxUsedSpacePercent }} + +# allow container interrupt +trap "{ exit 1; }" INT TERM {{- if .Values.bsc.forceInitFromSnapshot }} rm -f "${INITIALIZED_FILE}" @@ -66,14 +71,10 @@ STATS_URL="${GCS_BASE_URL}${S_STATS}" STATE_URL="${GCS_BASE_URL}${S_STATE_URL}" ANCIENT_URL="${GCS_BASE_URL}${S_ANCIENT_URL}" - STATE_SRC="$(${S5CMD} cat s3://${STATE_URL})" ANCIENT_SRC="$(${S5CMD} cat s3://${ANCIENT_URL})" REMOTE_STATS="$(${S5CMD} cat s3://${STATS_URL})" -# create dst dirs -mkdir -p "${CHAINDATA_DIR}/ancient" - # save sync source echo "${GCS_BASE_URL}" > "${DATA_DIR}/source" @@ -97,45 +98,77 @@ TIMESTAMP_0="$(${S5CMD} cat s3://${TIMESTAMP_URL})" # we're ready to perform actual data sync -# we're done when both are true +# we're done when all are true # 1) start and stop timestamps did not changed during data sync - no process started or finished updating the cloud -# 2) 0 objects copied +# 2) start timestamp is before stop timestamp - no process is in progress updating the cloud +# 3) 0 objects copied SYNC=2 CLEANUP=1 while [ "${SYNC}" -gt 0 ] ; do # Cleanup if [ ${CLEANUP} -eq 1 ];then - echo "$(date -Iseconds) Cleaning up local dir ..." - mkdir -p ${DATA_DIR}/geth - mv ${DATA_DIR}/geth ${DATA_DIR}/geth.old && rm -rf ${DATA_DIR}/geth.old & + echo "$(date -Iseconds) Cleaning up local dir ${GETH_DIR} ..." + mkdir -p "${GETH_DIR}" + mv "${GETH_DIR}" "${GETH_DIR}.old" && rm -rf "${GETH_DIR}.old" & CLEANUP=0 fi - # sync from cloud to local disk, without removing existing [missing in the cloud] files + # sync from cloud to local disk, with removing existing [missing in the cloud] files # run multiple syncs in background - # we don't wanna sync ancient data here - time ${S5CMD} sync ${EXCLUDE_ANCIENT} s3://${STATE_SRC}/* ${CHAINDATA_DIR}/ > cplist_state.txt & + time ${S5CMD} sync --delete s3://${STATE_SRC}/* ${STATE_TMP_DIR}/ > cplist_state.txt & STATE_CP_PID=$! - time nice ${S5CMD} sync --delete --part-size 200 --concurrency 2 ${EXCLUDE_STATE} s3://${ANCIENT_SRC}/* ${CHAINDATA_DIR}/ancient/ > cplist_ancient.txt & + time nice ${S5CMD} sync --delete --part-size 200 --concurrency 2 s3://${ANCIENT_SRC}/* ${ANCIENT_TMP_DIR}/ > cplist_ancient.txt & ANCIENT_CP_PID=$! # wait for all syncs to complete - # TODO any errors handling here? - wait ${STATE_CP_PID} ${ANCIENT_CP_PID} - + # shell tracks all sub-processes and stores exit codes internally + # it's not required to stay in wait state for all background processes at the same time + # we'll handle these processes sequentially + wait ${STATE_CP_PID} + STATE_CP_EXIT_CODE=$? + wait ${ANCIENT_CP_PID} + ANCIENT_CP_EXIT_CODE=$? + + # let's handle out of disk space specially, thus we don't re-try, just stuck here if disk usage is high + VOLUME_USAGE_PERCENT=$(df "${DATA_DIR}" | tail -n 1 | awk '{print $5}'|tr -d %) + if [ "${VOLUME_USAGE_PERCENT}" -gt "${MAX_USED_SPACE_PERCENT}" ];then + set +x + # stop monitoring + if [ ${MON_PID} -ne 0 ];then kill ${MON_PID};MON_PID=0; fi + echo "We're out of disk space. Stuck here, nothing we can do. Check the source snapshot size" + echo "Source snapshot size ${REMOTE_STATS}" + echo "Disk usage is ${VOLUME_USAGE_PERCENT}%" + df -P -BG "${DATA_DIR}" + # we need to sleep inside loop to handle pod termination w/o delays + # infinite sleep loop + while true; do sleep 10;done + # never hit there + fi + # s5cmd uses 0 for success and 1 for any errors + # no errors - we're good to go + # any errors - retry the download + # all the exit codes have to be 0 + if [ "${STATE_CP_EXIT_CODE}" -ne "0" ] || [ "${ANCIENT_CP_EXIT_CODE}" -ne "0" ];then + echo "s5cmd sync returned non-zero, retrying sync after the short sleep" + # wait some time to not spam with billable requests too frequently + sleep 60 + SYNC=2 + continue + fi # get start and stop timestamps from the cloud after sync UPDATING_1="$(${S5CMD} cat s3://${UPDATING_URL})" TIMESTAMP_1="$(${S5CMD} cat s3://${TIMESTAMP_URL})" # compare timestamps before and after sync - if [ "${UPDATING_0}" -eq "${UPDATING_1}" ] && [ "${TIMESTAMP_0}" -eq "${TIMESTAMP_1}" ];then - echo "Timestamps are equal" + # ensuring start timestamp is earlier than stop timestamp + if [ "${UPDATING_0}" -eq "${UPDATING_1}" ] && [ "${TIMESTAMP_0}" -eq "${TIMESTAMP_1}" ] && [ "${TIMESTAMP_1}" -gt "${UPDATING_1}" ] ;then + echo "Timestamps did not changed and start timestamp is before stop timestamp" echo -e "U_0=${UPDATING_0}\tU_1=${UPDATING_1},\tT_0=${TIMESTAMP_0}\tT_1=${TIMESTAMP_1}" let SYNC=SYNC-1 else - echo "Timestamps changed, running sync again ..." + echo "Source timestamps changed or start timestamp is after stop timestamp, running sync again ..." echo -e "U_0=${UPDATING_0}\tU_1=${UPDATING_1},\tT_0=${TIMESTAMP_0}\tT_1=${TIMESTAMP_1}" # end timestamps -> begin timestamps UPDATING_0=${UPDATING_1} @@ -148,7 +181,7 @@ while [ "${SYNC}" -gt 0 ] ; do continue fi - # stop monitoring + # stop monitoring, we don't expect massive data copying if [ ${MON_PID} -ne 0 ];then kill ${MON_PID} MON_PID=0 @@ -168,5 +201,10 @@ while [ "${SYNC}" -gt 0 ] ; do fi done +# prepare geth datadir from tmp dirs +mv "${STATE_TMP_DIR}" "${CHAINDATA_DIR}" +rm -rf "${CHAINDATA_DIR}/ancient" +mv "${ANCIENT_TMP_DIR}" "${CHAINDATA_DIR}/ancient" + # Mark data dir as initialized touch ${INITIALIZED_FILE} diff --git a/dysnix/bsc/values.yaml b/dysnix/bsc/values.yaml index 514a5cc3..8a33a34d 100644 --- a/dysnix/bsc/values.yaml +++ b/dysnix/bsc/values.yaml @@ -148,6 +148,7 @@ bsc: indexUrl: "bucket/path/to/file" baseUrlOverride: "" # "bucket/path/to/dir" fullResyncOnSrcUpdate: false + maxUsedSpacePercent: 93 # percents syncToGCS: enabled: false image: peakcom/s5cmd:v2.2.2