Skip to content

Commit

Permalink
fix: add 2 args and fix some bugs
Browse files Browse the repository at this point in the history
  • Loading branch information
linkdesu committed Jul 9, 2024
1 parent 3251924 commit f94be69
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 118 deletions.
99 changes: 2 additions & 97 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,103 +6,8 @@ yarn-debug.log*
yarn-error.log*
lerna-debug.log*

# Diagnostic reports (https://nodejs.org/api/report.html)
report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json

# Runtime data
pids
*.pid
*.seed
*.pid.lock

# Directory for instrumented libs generated by jscoverage/JSCover
lib-cov

# Coverage directory used by tools like istanbul
coverage
*.lcov

# nyc test coverage
.nyc_output

# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
.grunt

# Bower dependency directory (https://bower.io/)
bower_components

# node-waf configuration
.lock-wscript

# Compiled binary addons (https://nodejs.org/api/addons.html)
build/Release

# Dependency directories
node_modules/
jspm_packages/

# TypeScript v1 declaration files
typings/

# TypeScript cache
*.tsbuildinfo

# Optional npm cache directory
.npm

# Optional eslint cache
.eslintcache

# Microbundle cache
.rpt2_cache/
.rts2_cache_cjs/
.rts2_cache_es/
.rts2_cache_umd/

# Optional REPL history
.node_repl_history

# Output of 'npm pack'
*.tgz

# Yarn Integrity file
.yarn-integrity

# dotenv environment variables file
.env
.env.test

# parcel-bundler cache (https://parceljs.org/)
.cache

# Next.js build output
.next

# Nuxt.js build / generate output
.nuxt
dist

# Gatsby files
.cache/
# Comment in the public line in if your project uses Gatsby and *not* Next.js
# https://nextjs.org/blog/next-9-1#public-directory-support
# public

# vuepress build output
.vuepress/dist

# Serverless directories
.serverless/

# FuseBox cache
.fusebox/

# DynamoDB Local files
.dynamodb/

# TernJS port file
.tern-port

dist/
.idea/

config/local*
*.json
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ Because of unstable network, sometimes the CKB node may disconnect from peers an
```bash
pm2 start --cron-restart="* * * * *" \
./scripts/ckb-node-monit.mjs -- \
--block-timeout 180 \
--log /path_to_ckb_node_log_dir/run.log \
--data ./ckb-node-status.json
```
Expand Down
63 changes: 42 additions & 21 deletions scripts/ckb-node-monit.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -9,22 +9,22 @@ import fs from 'fs'
import readline from 'readline'
import { spawn } from 'child_process'

const newBlockTimeout = 180 * 1000; // 3 minutes
const restartTimeout = 300 * 1000; // 5 minutes
const DEFAULT_BLOCK_TIMEOUT = 180; // 3 minutes
const DEFAULT_RESTART_TIMEOUT = 300; // 5 minutes

(async () => {
const { logFilePath, dataFilePath } = parseArgs();
const { logFilePath, dataFilePath, blockTimeout, restartTimeout } = parseArgs();
const previousStatus = readStatusFromFile(dataFilePath);

const now = new Date();
const timePassed = now - new Date(previousStatus.timestamp);
const lastHeightPassed = now - new Date(previousStatus.lastHeightAt);
const lastRestartPassed = now - new Date(previousStatus.lastRestartAt);
const latesHeight = await getLatestHeight(logFilePath);
const latestHeight = await getLatestHeight(logFilePath);

if (latesHeight === previousStatus.latestHeight) {
// If no new block for 3 minutes and the node has not been restarted for 5 minutes, try restart it again.
if (timePassed > newBlockTimeout && lastRestartPassed > restartTimeout) {
log('warning', `The CKB node has stayed at height ${latesHeight} for more than 3 minutes, restarting it ...`);
if (latestHeight === previousStatus.latestHeight) {
// If no new block for 3 minutes and the node has been restarted for 5 minutes, try restart it again.
if (lastHeightPassed > blockTimeout && lastRestartPassed > restartTimeout) {
log('warning', `The CKB node has stayed at height ${latestHeight} for more than ${Math.round(lastHeightPassed / 60) / 1000} minutes, restarting it ...`);

restartCKBNode();

Expand All @@ -36,9 +36,9 @@ const restartTimeout = 300 * 1000; // 5 minutes
}
} else {
// If new block is found, update the status.
log('info', `Record height ${latesHeight} into ${dataFilePath} ...`);
log('info', `Record height ${latestHeight} into ${dataFilePath} ...`);

const newStatus = { ...previousStatus, latesHeight, timestamp: now.toISOString() };
const newStatus = { ...previousStatus, latestHeight: latestHeight, lastHeightAt: now.toISOString() };
writeStatusToFile(dataFilePath, newStatus);
}
})();
Expand All @@ -55,52 +55,73 @@ async function getLatestHeight (logFilePath) {
lines.push(line);
}

let latesHeight = null;
let latestHeight = null;
const regex = /^[0-9-:\s.+]+ ChainService INFO ckb_chain::chain\s+block: (\d+)/;

for (const line of lines) {
const match = line.match(regex);
if (match) {
latesHeight = parseInt(match[1], 10);
latestHeight = parseInt(match[1], 10);
}
}

return latesHeight;
return latestHeight;
}

function parseArgs () {
const args = process.argv.slice(2);
const logFilePathIndex = args.indexOf('--log') !== -1 ? args.indexOf('--log') : args.indexOf('-l');
const dataFilePathIndex = args.indexOf('--data') !== -1 ? args.indexOf('--data') : args.indexOf('-d');
const blockTimeoutIndex = args.indexOf('--block-timeout') !== -1 ? args.indexOf('--block-timeout') : args.indexOf('-b');
const restartTimeoutIndex = args.indexOf('--restart-timeout') !== -1 ? args.indexOf('--restart-timeout') : args.indexOf('-r');

if (logFilePathIndex === -1 || logFilePathIndex + 1 >= args.length) {
log('error', 'Usage: node script.js --log <path-to-logfile> --data <path-to-datafile>');
log('error', 'Usage: node script.js --log <path-to-logfile> --data <path-to-datafile> --block-timeout [block-timeout] --restart-timeout [restart-timeout]');
process.exit(1);
}

const logFilePath = args[logFilePathIndex + 1];
const dataFilePath = dataFilePathIndex !== -1 && dataFilePathIndex + 1 < args.length ? args[dataFilePathIndex + 1] : 'ckb-node-status.json';

return { logFilePath, dataFilePath };
let blockTimeout = DEFAULT_BLOCK_TIMEOUT * 1000;
let restartTimeout = DEFAULT_RESTART_TIMEOUT * 1000;

if (blockTimeoutIndex !== -1 && blockTimeoutIndex + 1 < args.length) {
blockTimeout = parseInt(args[blockTimeoutIndex + 1], 10);
if (isNaN(blockTimeout)) {
log('error', 'Invalid value for --block-timeout. Must be a number.');
process.exit(1);
}
}

if (restartTimeoutIndex !== -1 && restartTimeoutIndex + 1 < args.length) {
restartTimeout = parseInt(args[restartTimeoutIndex + 1], 10);
if (isNaN(restartTimeout)) {
log('error', 'Invalid value for --restart-timeout. Must be a number.');
process.exit(1);
}
}

return { logFilePath, dataFilePath, blockTimeout, restartTimeout };
}

function readStatusFromFile (dataFilePath) {
if (!fs.existsSync(dataFilePath)) {
return { latestHeight: null, timestamp: null, lastRestartAt: null };
return { latestHeight: null, lastHeightAt: null, lastRestartAt: null };
}

try {
const status = JSON.parse(fs.readFileSync(dataFilePath, 'utf8'));
return { latestHeight: status.latestHeight, timestamp: status.timestamp, lastRestartAt: status.lastRestartAt };
return { latestHeight: status.latestHeight, lastHeightAt: status.lastHeightAt, lastRestartAt: status.lastRestartAt };
} catch (_) {
return { latestHeight: null, timestamp: null, lastRestartAt: null };
return { latestHeight: null, lastHeightAt: null, lastRestartAt: null };
}
}

function writeStatusToFile (dataFilePath, status) {
const fieldsToWrite = {
latestHeight: status.latesHeight,
timestamp: status.timestamp ?? new Date().toISOString(),
latestHeight: status.latestHeight,
lastHeightAt: status.lastHeightAt,
lastRestartAt: status.lastRestartAt,
};

Expand Down

0 comments on commit f94be69

Please sign in to comment.