From 9fa7dbbd8488416d58b237306008bd6e01725eb1 Mon Sep 17 00:00:00 2001 From: Yurij Mikhalevich Date: Sat, 30 Nov 2024 20:03:45 +0400 Subject: [PATCH] feat: start collecting engine metrics (#86) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## How does this PR impact the user? No direct impact. This will help us debug performance issues :) ## Description image ## Limitations - we most likely will want to get rid of per-user metric at some point; but, for now, it's fine 😄 ## Checklist - [x] my PR is focused and contains one wholistic change - [x] I have added screenshots or screen recordings to show the changes --- package-lock.json | 34 ++++++++++++++++++++++++++++++++++ package.json | 1 + server/middleware/auth.ts | 9 ++++++++- server/other/engineMetrics.ts | 35 +++++++++++++++++++++++++++++++++++ server/plugins/engine.ts | 22 +++++++++++++++++++++- server/routes/metrics.ts | 12 ++++++++++++ 6 files changed, 111 insertions(+), 2 deletions(-) create mode 100644 server/other/engineMetrics.ts create mode 100644 server/routes/metrics.ts diff --git a/package-lock.json b/package-lock.json index afee2aa..ba780bb 100644 --- a/package-lock.json +++ b/package-lock.json @@ -15,6 +15,7 @@ "isolated-vm": "^4.6.0", "lru-cache": "^10.2.0", "pixi.js": "^8.3.4", + "prom-client": "^15.1.3", "zod": "^3.22.4" }, "devDependencies": { @@ -2458,6 +2459,14 @@ "dev": true, "license": "MIT" }, + "node_modules/@opentelemetry/api": { + "version": "1.9.0", + "resolved": "https://registry.npmjs.org/@opentelemetry/api/-/api-1.9.0.tgz", + "integrity": "sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg==", + "engines": { + "node": ">=8.0.0" + } + }, "node_modules/@parcel/watcher": { "version": "2.5.0", "resolved": "https://registry.npmjs.org/@parcel/watcher/-/watcher-2.5.0.tgz", @@ -5150,6 +5159,11 @@ "file-uri-to-path": "1.0.0" } }, + "node_modules/bintrees": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/bintrees/-/bintrees-1.0.2.tgz", + "integrity": "sha512-VOMgTMwjAaUG580SXn3LacVgjurrbMme7ZZNYGSSV7mmtY6QQRh0Eg3pwIcntQ77DErK1L0NxkbetjcoXzVwKw==" + }, "node_modules/birpc": { "version": "0.2.19", "resolved": "https://registry.npmjs.org/birpc/-/birpc-0.2.19.tgz", @@ -12811,6 +12825,18 @@ "dev": true, "license": "MIT" }, + "node_modules/prom-client": { + "version": "15.1.3", + "resolved": "https://registry.npmjs.org/prom-client/-/prom-client-15.1.3.tgz", + "integrity": "sha512-6ZiOBfCywsD4k1BN9IX0uZhF+tJkV8q8llP64G5Hajs4JOeVLPCwpPVcpXy3BwYiUGgyJzsJJQeOIv7+hDSq8g==", + "dependencies": { + "@opentelemetry/api": "^1.4.0", + "tdigest": "^0.1.1" + }, + "engines": { + "node": "^16 || ^18 || >=20" + } + }, "node_modules/prompts": { "version": "2.4.2", "resolved": "https://registry.npmjs.org/prompts/-/prompts-2.4.2.tgz", @@ -14622,6 +14648,14 @@ "node": ">=8" } }, + "node_modules/tdigest": { + "version": "0.1.2", + "resolved": "https://registry.npmjs.org/tdigest/-/tdigest-0.1.2.tgz", + "integrity": "sha512-+G0LLgjjo9BZX2MfdvPfH+MKLCrxlXSYec5DaPYP1fe6Iyhf0/fSmJ0bFiZ1F8BT6cGXl2LpltQptzjXKWEkKA==", + "dependencies": { + "bintrees": "1.0.2" + } + }, "node_modules/terser": { "version": "5.36.0", "resolved": "https://registry.npmjs.org/terser/-/terser-5.36.0.tgz", diff --git a/package.json b/package.json index 89cdd7c..0dc31a7 100644 --- a/package.json +++ b/package.json @@ -61,6 +61,7 @@ "isolated-vm": "^4.6.0", "lru-cache": "^10.2.0", "pixi.js": "^8.3.4", + "prom-client": "^15.1.3", "zod": "^3.22.4" } } diff --git a/server/middleware/auth.ts b/server/middleware/auth.ts index daac12e..d76e259 100644 --- a/server/middleware/auth.ts +++ b/server/middleware/auth.ts @@ -1,8 +1,15 @@ import prisma from "~/other/db"; import { SESSION_COOKIE_NAME, SESSION_STORAGE } from "~/other/sessionStorage"; +const PUBLIC_PATHS = [ + "/api/auth/login", + "/login", + + "/metrics", +]; + export default defineEventHandler(async (event) => { - if (event.path.includes("login")) { + if (PUBLIC_PATHS.includes(event.path)) { return; } diff --git a/server/other/engineMetrics.ts b/server/other/engineMetrics.ts new file mode 100644 index 0000000..bbc688c --- /dev/null +++ b/server/other/engineMetrics.ts @@ -0,0 +1,35 @@ +import promClient from "prom-client"; + +export const gameStepTimeMs = new promClient.Histogram({ + name: "game_step_time_ms", + help: "Time all bots took to make a move and engine took to process them", + buckets: [200, 250], +}); + +export const botCodeRunTimeMs = new promClient.Histogram({ + name: "bot_code_run_time_ms", + help: "Time a bot took to make a move", + buckets: [50, 75], + labelNames: ["username"] as const, +}); + +export const gameStepCodeRunTimeMs = new promClient.Histogram({ + name: "game_step_code_run_time_ms", + help: "Time all bots took to make a move", + buckets: [50, 100, 150], +}); + +export const gameStepMoveBotTimeMs = new promClient.Summary({ + name: "game_step_move_bot_time_ms", + help: "Time it took to move all bots", +}); + +export const gameStepCollisionCheckTimeMs = new promClient.Summary({ + name: "game_step_collision_check_time_ms", + help: "Time it took to check for collisions", +}); + +export const gameStepFoodSpawnTimeMs = new promClient.Summary({ + name: "game_step_food_spawn_time_ms", + help: "Time it took to spawn food", +}); diff --git a/server/plugins/engine.ts b/server/plugins/engine.ts index 2362b8a..cdd8e05 100644 --- a/server/plugins/engine.ts +++ b/server/plugins/engine.ts @@ -1,3 +1,4 @@ +import { gameStepCodeRunTimeMs, botCodeRunTimeMs, gameStepTimeMs, gameStepMoveBotTimeMs, gameStepCollisionCheckTimeMs, gameStepFoodSpawnTimeMs } from "../other/engineMetrics"; import prepareBotCode from "~/other/prepareBotCode"; import * as botCodeStore from "~/other/botCodeStore"; import World, { type WorldState } from "~/other/world"; @@ -36,12 +37,21 @@ async function runBots({ bots, world, prevBotState, botApi }: RunBotArgs) { botApi, })); + const stopAllBotCodeRuntimeTimer = gameStepCodeRunTimeMs.startTimer(); const botActions = await Promise.all(preparedBotCodes.map(async (preparedCode, i) => { const codeRunner = codeRunners[i % codeRunners.length]; if (!codeRunner) { throw new Error("unexpected: codeRunner is undefined"); } + const bot = botArray[i]; + if (!bot) { + throw new Error("unexpected: bot is undefined"); + } + + const stopBotCodeRuntimeTimer = botCodeRunTimeMs.startTimer({ + username: bot.username, + }); try { const result = await codeRunner.runCode(preparedCode); return JSON.parse(result); @@ -49,22 +59,31 @@ async function runBots({ bots, world, prevBotState, botApi }: RunBotArgs) { // TODO(yurij): notify user that their bot crashed console.error(err); return []; + } finally { + stopBotCodeRuntimeTimer(); } })); + stopAllBotCodeRuntimeTimer(); + const stopMoveBotTimeTimer = gameStepMoveBotTimeMs.startTimer(); for (const [i, actions] of botActions.entries()) { const botId = botArray[i]?.id; if (botId && actions?.[0]?.type === "move") { world.moveBot(botId, actions[0].x, actions[0].y); } } + stopMoveBotTimeTimer(); + const stopCollisionCheckTimeTimer = gameStepCollisionCheckTimeMs.startTimer(); let collisionCheckLimit = 5; while (world.checkCollisions() && --collisionCheckLimit) { continue; } + stopCollisionCheckTimeTimer(); + const stopSpawnFoodTimeTimer = gameStepFoodSpawnTimeMs.startTimer(); world.spawnFood(); + stopSpawnFoodTimeTimer(); } type StartEngineArgs = { @@ -107,8 +126,9 @@ async function startEngine({ botApi }: StartEngineArgs) { } const endTs = Date.now(); - const timeSpentMs = endTs - startTs; + gameStepTimeMs.observe(timeSpentMs); + if (timeSpentMs < GAME_STEP_INTERVAL_MS) { await new Promise(resolve => setTimeout(resolve, GAME_STEP_INTERVAL_MS - timeSpentMs)); } else { diff --git a/server/routes/metrics.ts b/server/routes/metrics.ts new file mode 100644 index 0000000..4881b4c --- /dev/null +++ b/server/routes/metrics.ts @@ -0,0 +1,12 @@ +import promClient from "prom-client"; + +promClient.collectDefaultMetrics(); + +export default defineEventHandler(async () => { + const metrics = await promClient.register.metrics(); + return new Response(metrics, { + headers: { + "Content-Type": promClient.register.contentType, + }, + }); +});