From b52f824c3ce8f1def66e6699429b003e02606e33 Mon Sep 17 00:00:00 2001 From: Douglas Gubert Date: Sun, 10 Nov 2024 15:39:07 -0300 Subject: [PATCH 1/5] fix: engine restart process for Deno used wrong status RocketChat/Rocket.Chat#33865 --- src/definition/metadata/AppMethod.ts | 2 + src/server/compiler/AppCompiler.ts | 2 +- src/server/managers/AppRuntimeManager.ts | 9 ++- .../runtime/deno/AppsEngineDenoRuntime.ts | 69 +++++++++++++++---- src/server/runtime/deno/LivenessManager.ts | 12 +++- .../DenoRuntimeSubprocessController.spec.ts | 23 +++++-- 6 files changed, 94 insertions(+), 23 deletions(-) diff --git a/src/definition/metadata/AppMethod.ts b/src/definition/metadata/AppMethod.ts index ffe80c4d6..007ef7d0c 100644 --- a/src/definition/metadata/AppMethod.ts +++ b/src/definition/metadata/AppMethod.ts @@ -98,4 +98,6 @@ export enum AppMethod { EXECUTE_POST_USER_LOGGED_IN = 'executePostUserLoggedIn', EXECUTE_POST_USER_LOGGED_OUT = 'executePostUserLoggedOut', EXECUTE_POST_USER_STATUS_CHANGED = 'executePostUserStatusChanged', + // Runtime specific methods + RUNTIME_RESTART = 'runtime:restart', } diff --git a/src/server/compiler/AppCompiler.ts b/src/server/compiler/AppCompiler.ts index d1f9ddf8a..33f937771 100644 --- a/src/server/compiler/AppCompiler.ts +++ b/src/server/compiler/AppCompiler.ts @@ -21,7 +21,7 @@ export class AppCompiler { throw new Error(`Invalid App package for "${storage.info.name}". Could not find the classFile (${storage.info.classFile}) file.`); } - const runtime = await manager.getRuntime().startRuntimeForApp(packageResult); + const runtime = await manager.getRuntime().startRuntimeForApp(packageResult, storage); const app = new ProxiedApp(manager, storage, runtime); diff --git a/src/server/managers/AppRuntimeManager.ts b/src/server/managers/AppRuntimeManager.ts index 64adf9b0a..6c63307a9 100644 --- a/src/server/managers/AppRuntimeManager.ts +++ b/src/server/managers/AppRuntimeManager.ts @@ -1,6 +1,7 @@ import type { AppManager } from '../AppManager'; import type { IParseAppPackageResult } from '../compiler'; import { DenoRuntimeSubprocessController } from '../runtime/deno/AppsEngineDenoRuntime'; +import type { IAppStorageItem } from '../storage'; export type AppRuntimeParams = { appId: string; @@ -21,14 +22,18 @@ export class AppRuntimeManager { constructor(private readonly manager: AppManager) {} - public async startRuntimeForApp(appPackage: IParseAppPackageResult, options = { force: false }): Promise { + public async startRuntimeForApp( + appPackage: IParseAppPackageResult, + storageItem: IAppStorageItem, + options = { force: false }, + ): Promise { const { id: appId } = appPackage.info; if (appId in this.subprocesses && !options.force) { throw new Error('App already has an associated runtime'); } - this.subprocesses[appId] = new DenoRuntimeSubprocessController(this.manager, appPackage); + this.subprocesses[appId] = new DenoRuntimeSubprocessController(this.manager, appPackage, storageItem); await this.subprocesses[appId].setupApp(); diff --git a/src/server/runtime/deno/AppsEngineDenoRuntime.ts b/src/server/runtime/deno/AppsEngineDenoRuntime.ts index d1f5b5588..e66281427 100644 --- a/src/server/runtime/deno/AppsEngineDenoRuntime.ts +++ b/src/server/runtime/deno/AppsEngineDenoRuntime.ts @@ -7,11 +7,11 @@ import debugFactory from 'debug'; import { decoder } from './codec'; import type { AppManager } from '../../AppManager'; -import type { AppLogStorage } from '../../storage'; +import type { AppLogStorage, IAppStorageItem } from '../../storage'; import type { AppBridges } from '../../bridges'; import type { IParseAppPackageResult } from '../../compiler'; +import { AppConsole, type ILoggerStorageEntry } from '../../logging'; import type { AppAccessorManager, AppApiManager } from '../../managers'; -import type { ILoggerStorageEntry } from '../../logging'; import { AppStatus } from '../../../definition/AppStatus'; import { bundleLegacyApp } from './bundler'; import { ProcessMessenger } from './ProcessMessenger'; @@ -103,7 +103,7 @@ export class DenoRuntimeSubprocessController extends EventEmitter { private readonly livenessManager: LivenessManager; // We need to keep the appSource around in case the Deno process needs to be restarted - constructor(manager: AppManager, private readonly appPackage: IParseAppPackageResult) { + constructor(manager: AppManager, private readonly appPackage: IParseAppPackageResult, private readonly storageItem: IAppStorageItem) { super(); this.debug = baseDebug.extend(appPackage.info.id); @@ -164,23 +164,38 @@ export class DenoRuntimeSubprocessController extends EventEmitter { } } - public async killProcess(): Promise { + /** + * Attempts to kill the process currently controlled by this.deno + * + * @returns boolean - if a process has been killed or not + */ + public async killProcess(): Promise { + if (!this.deno) { + this.debug('No child process reference'); + return false; + } + + let { killed } = this.deno; + // This field is not populated if the process is killed by the OS - if (this.deno.killed) { + if (killed) { this.debug('App process was already killed'); - return; + return killed; } // What else should we do? if (this.deno.kill('SIGKILL')) { // Let's wait until we get confirmation the process exited await new Promise((r) => this.deno.on('exit', r)); + killed = true; } else { this.debug('Tried killing the process but failed. Was it already dead?'); + killed = false; } delete this.deno; this.messenger.clearReceiver(); + return killed; } // Debug purposes, could be deleted later @@ -231,19 +246,49 @@ export class DenoRuntimeSubprocessController extends EventEmitter { public async restartApp() { this.debug('Restarting app subprocess'); + const logger = new AppConsole('runtime:restart'); + + logger.info('Starting restart procedure for app subprocess...', this.livenessManager.getRuntimeData()); this.state = 'restarting'; - await this.killProcess(); + try { + const pid = this.deno?.pid; - await this.setupApp(); + const hasKilled = await this.killProcess(); - // setupApp() changes the state to 'ready' - we'll need to workaround that for now - this.state = 'restarting'; + if (hasKilled) { + logger.debug('Process successfully terminated', { pid }); + } else { + logger.warn('Could not terminate process. Maybe it was already dead?', { pid }); + } - await this.sendRequest({ method: 'app:initialize' }); + await this.setupApp(); + logger.info('New subprocess successfully spawned', { pid: this.deno.pid }); - this.state = 'ready'; + // setupApp() changes the state to 'ready' - we'll need to workaround that for now + this.state = 'restarting'; + + // When an app has just been installed, the status in the storageItem passed to this controller will be "initialized" + // So, whenever we get that value here, let's just make it 'auto_enabled' + let status = this.storageItem.status; + + if (status === AppStatus.INITIALIZED) { + logger.info('Stored status was "initialized". Changing to "auto_enabled"'); + status = AppStatus.AUTO_ENABLED; + } + + await this.sendRequest({ method: 'app:initialize' }); + await this.sendRequest({ method: 'app:setStatus', params: [status] }); + + this.state = 'ready'; + + logger.info('Successfully restarted app subprocess'); + } catch (e) { + logger.error("Failed to restart app's subprocess", { error: e }); + } finally { + await this.logStorage.storeEntries(AppConsole.toStorageEntry(this.getAppId(), logger)); + } } public getAppId(): string { diff --git a/src/server/runtime/deno/LivenessManager.ts b/src/server/runtime/deno/LivenessManager.ts index 4fb2433e8..dc89acc87 100644 --- a/src/server/runtime/deno/LivenessManager.ts +++ b/src/server/runtime/deno/LivenessManager.ts @@ -10,7 +10,7 @@ const defaultOptions: LivenessManager['options'] = { pingRequestTimeout: 10000, pingFrequencyInMS: 10000, consecutiveTimeoutLimit: 4, - maxRestarts: 3, + maxRestarts: Infinity, }; /** @@ -65,6 +65,16 @@ export class LivenessManager { this.options = Object.assign({}, defaultOptions, options); } + public getRuntimeData() { + const { restartCount, pingTimeoutConsecutiveCount, restartLog } = this; + + return { + restartCount, + pingTimeoutConsecutiveCount, + restartLog, + }; + } + public attach(deno: ChildProcess) { this.subprocess = deno; diff --git a/tests/server/runtime/DenoRuntimeSubprocessController.spec.ts b/tests/server/runtime/DenoRuntimeSubprocessController.spec.ts index 1cc1846ea..5c675805e 100644 --- a/tests/server/runtime/DenoRuntimeSubprocessController.spec.ts +++ b/tests/server/runtime/DenoRuntimeSubprocessController.spec.ts @@ -1,17 +1,19 @@ -import * as path from 'path'; import * as fs from 'fs/promises'; +import * as path from 'path'; import { TestFixture, Setup, Expect, AsyncTest, SpyOn, Any, AsyncSetupFixture, Teardown } from 'alsatian'; import type { SuccessObject } from 'jsonrpc-lite'; -import { AppAccessorManager, AppApiManager } from '../../../src/server/managers'; -import { TestInfastructureSetup } from '../../test-data/utilities'; -import { DenoRuntimeSubprocessController } from '../../../src/server/runtime/deno/AppsEngineDenoRuntime'; -import type { AppManager } from '../../../src/server/AppManager'; +import { AppStatus } from '../../../src/definition/AppStatus'; import { UserStatusConnection, UserType } from '../../../src/definition/users'; +import type { AppManager } from '../../../src/server/AppManager'; import type { IParseAppPackageResult } from '../../../src/server/compiler'; +import { AppAccessorManager, AppApiManager } from '../../../src/server/managers'; +import { DenoRuntimeSubprocessController } from '../../../src/server/runtime/deno/AppsEngineDenoRuntime'; +import type { IAppStorageItem } from '../../../src/server/storage'; +import { TestInfastructureSetup } from '../../test-data/utilities'; -@TestFixture('DenoRuntimeSubprocessController') +@TestFixture() export class DenuRuntimeSubprocessControllerTestFixture { private manager: AppManager; @@ -19,6 +21,8 @@ export class DenuRuntimeSubprocessControllerTestFixture { private appPackage: IParseAppPackageResult; + private appStorageItem: IAppStorageItem; + @AsyncSetupFixture public async fixture() { const infrastructure = new TestInfastructureSetup(); @@ -35,11 +39,16 @@ export class DenuRuntimeSubprocessControllerTestFixture { const appPackage = await fs.readFile(path.join(__dirname, '../../test-data/apps/hello-world-test_0.0.1.zip')); this.appPackage = await this.manager.getParser().unpackageApp(appPackage); + + this.appStorageItem = { + id: 'hello-world-test', + status: AppStatus.MANUALLY_ENABLED, + } as IAppStorageItem; } @Setup public setup() { - this.controller = new DenoRuntimeSubprocessController(this.manager, this.appPackage); + this.controller = new DenoRuntimeSubprocessController(this.manager, this.appPackage, this.appStorageItem); this.controller.setupApp(); } From 7d6647fb7bf4409417f0556cddd69accbc5eb947 Mon Sep 17 00:00:00 2001 From: Douglas Gubert Date: Sun, 10 Nov 2024 15:42:31 -0300 Subject: [PATCH 2/5] fix: deno runtime controller not handling undefined child process reference correctly RocketChat/Rocket.Chat#33494 --- src/server/runtime/deno/AppsEngineDenoRuntime.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/server/runtime/deno/AppsEngineDenoRuntime.ts b/src/server/runtime/deno/AppsEngineDenoRuntime.ts index e66281427..dd64cdacb 100644 --- a/src/server/runtime/deno/AppsEngineDenoRuntime.ts +++ b/src/server/runtime/deno/AppsEngineDenoRuntime.ts @@ -215,7 +215,7 @@ export class DenoRuntimeSubprocessController extends EventEmitter { public async getStatus(): Promise { // If the process has been terminated, we can't get the status - if (this.deno.exitCode !== null) { + if (!this.deno || this.deno.exitCode !== null) { return AppStatus.UNKNOWN; } From 653da283931261200d75d903aa8f7a7fe52118f3 Mon Sep 17 00:00:00 2001 From: Douglas Gubert Date: Sun, 10 Nov 2024 15:43:37 -0300 Subject: [PATCH 3/5] Add reason field to restart logs --- src/server/runtime/deno/AppsEngineDenoRuntime.ts | 2 +- src/server/runtime/deno/LivenessManager.ts | 11 ++++++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/server/runtime/deno/AppsEngineDenoRuntime.ts b/src/server/runtime/deno/AppsEngineDenoRuntime.ts index dd64cdacb..00c07aa8a 100644 --- a/src/server/runtime/deno/AppsEngineDenoRuntime.ts +++ b/src/server/runtime/deno/AppsEngineDenoRuntime.ts @@ -271,7 +271,7 @@ export class DenoRuntimeSubprocessController extends EventEmitter { // When an app has just been installed, the status in the storageItem passed to this controller will be "initialized" // So, whenever we get that value here, let's just make it 'auto_enabled' - let status = this.storageItem.status; + let { status } = this.storageItem; if (status === AppStatus.INITIALIZED) { logger.info('Stored status was "initialized". Changing to "auto_enabled"'); diff --git a/src/server/runtime/deno/LivenessManager.ts b/src/server/runtime/deno/LivenessManager.ts index dc89acc87..2450b3f2a 100644 --- a/src/server/runtime/deno/LivenessManager.ts +++ b/src/server/runtime/deno/LivenessManager.ts @@ -132,7 +132,7 @@ export class LivenessManager { if (reason === 'timeout' && this.pingTimeoutConsecutiveCount >= this.options.consecutiveTimeoutLimit) { this.debug('Subprocess failed to respond to pings %d consecutive times. Attempting restart...', this.options.consecutiveTimeoutLimit); - this.restartProcess(); + this.restartProcess('Too many pings timed out'); return false; } @@ -164,17 +164,21 @@ export class LivenessManager { return; } + let reason: string; + // Otherwise we try to restart the subprocess, if possible if (signal) { this.debug('App has been killed (%s). Attempting restart #%d...', signal, this.restartCount + 1); + reason = `App has been killed with signal ${signal}`; } else { this.debug('App has exited with code %d. Attempting restart #%d...', exitCode, this.restartCount + 1); + reason = `App has exited with code ${exitCode}`; } - this.restartProcess(); + this.restartProcess(reason); } - private restartProcess() { + private restartProcess(reason: string) { if (this.restartCount >= this.options.maxRestarts) { this.debug('Limit of restarts reached (%d). Aborting restart...', this.options.maxRestarts); this.controller.stopApp(); @@ -184,6 +188,7 @@ export class LivenessManager { this.pingTimeoutConsecutiveCount = 0; this.restartCount++; this.restartLog.push({ + reason, restartedAt: new Date(), source: 'liveness-manager', pid: this.subprocess.pid, From a04ca3ab2062ccdffb8f84d30876832ea2a82fef Mon Sep 17 00:00:00 2001 From: Douglas Gubert Date: Sun, 10 Nov 2024 16:52:12 -0300 Subject: [PATCH 4/5] Bump upload-artifact action --- .github/workflows/build_and_test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index d6278d8f2..87cbed4fe 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -65,7 +65,7 @@ jobs: run: | tar czf /tmp/workspace.tar.gz . - - uses: actions/upload-artifact@v2 + - uses: actions/upload-artifact@v4 with: name: workspace path: /tmp/workspace.tar.gz @@ -140,7 +140,7 @@ jobs: run: | tar czf /tmp/workspace.tar.gz . - - uses: actions/upload-artifact@v2 + - uses: actions/upload-artifact@v4 with: name: workspace path: /tmp/workspace.tar.gz From 7042b29e58a47cd47d304697171aa5de8838c747 Mon Sep 17 00:00:00 2001 From: Douglas Gubert Date: Sun, 10 Nov 2024 16:57:46 -0300 Subject: [PATCH 5/5] Bump download-artifact action --- .github/workflows/build_and_test.yml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 87cbed4fe..9f65042d3 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -80,7 +80,7 @@ jobs: with: node-version: '14.19.3' - - uses: actions/download-artifact@v2 + - uses: actions/download-artifact@v4 with: name: workspace path: /tmp @@ -102,7 +102,7 @@ jobs: with: node-version: '14.19.3' - - uses: actions/download-artifact@v2 + - uses: actions/download-artifact@v4 with: name: workspace path: /tmp @@ -124,7 +124,7 @@ jobs: with: node-version: '14.19.3' - - uses: actions/download-artifact@v2 + - uses: actions/download-artifact@v4 with: name: workspace path: /tmp @@ -144,6 +144,7 @@ jobs: with: name: workspace path: /tmp/workspace.tar.gz + overwrite: true publish: runs-on: ubuntu-latest @@ -156,7 +157,7 @@ jobs: with: node-version: '14.19.3' - - uses: actions/download-artifact@v2 + - uses: actions/download-artifact@v4 with: name: workspace path: /tmp