From c9886d9dd86bea13b85fe0335a96c8ca24a30fd9 Mon Sep 17 00:00:00 2001 From: erensanlier Date: Fri, 31 May 2024 01:42:27 +0300 Subject: [PATCH] feat: checkpoint selecting strategy on sync-store getEvents (#907) * implements checkpoint selecting strategy and improved sql generation * simplify max checkpoint selection * use offset instead of limit to improve performance * Revert "use offset instead of limit to improve performance" This reverts commit 705ff40e72470bc588136110d401d4da89c8cc59. * checkpoint limit adjustment * Revert "checkpoint limit adjustment" This reverts commit 6fe6e812f48e3cd899816dfca741fccf65efe8a0. * reintroduces offset selection * add sqlite and tweaks * changeset --------- Co-authored-by: typedarray <90073088+0xOlias@users.noreply.github.com> --- .changeset/lemon-dingos-leave.md | 5 ++ .../core/src/sync-store/postgres/store.ts | 60 ++++++++++++++++++- packages/core/src/sync-store/sqlite/store.ts | 60 ++++++++++++++++++- 3 files changed, 123 insertions(+), 2 deletions(-) create mode 100644 .changeset/lemon-dingos-leave.md diff --git a/.changeset/lemon-dingos-leave.md b/.changeset/lemon-dingos-leave.md new file mode 100644 index 000000000..847ae690f --- /dev/null +++ b/.changeset/lemon-dingos-leave.md @@ -0,0 +1,5 @@ +--- +"@ponder/core": patch +--- + +Improved the performance an important internal SQL query (`getEvents`) for large apps. An app with ~5M rows in the `ponder_sync.logs` table saw a ~20x reduction in execution time for this query. Smaller apps will see a more modest improvement. diff --git a/packages/core/src/sync-store/postgres/store.ts b/packages/core/src/sync-store/postgres/store.ts index 5ff0b5a92..d27b6b9d8 100644 --- a/packages/core/src/sync-store/postgres/store.ts +++ b/packages/core/src/sync-store/postgres/store.ts @@ -1694,6 +1694,58 @@ export class PostgresSyncStore implements SyncStore { ) } )`, ) + .with("log_checkpoints", (db) => + db + .selectFrom("logs") + .where("logs.checkpoint", ">", cursor) + .where("logs.checkpoint", "<=", encodedToCheckpoint) + .orderBy("logs.checkpoint", "asc") + .offset(limit) + .limit(1) + .select("logs.checkpoint"), + ) + .with("block_checkpoints", (db) => + db + .selectFrom("blocks") + .where("blocks.checkpoint", ">", cursor) + .where("blocks.checkpoint", "<=", encodedToCheckpoint) + .orderBy("blocks.checkpoint", "asc") + .offset(limit) + .limit(1) + .select("blocks.checkpoint"), + ) + .with("call_trace_checkpoints", (db) => + db + .selectFrom("callTraces") + .where("callTraces.checkpoint", ">", cursor) + .where("callTraces.checkpoint", "<=", encodedToCheckpoint) + .orderBy("callTraces.checkpoint", "asc") + .offset(limit) + .limit(1) + .select("callTraces.checkpoint"), + ) + .with("max_checkpoint", (db) => + db + .selectFrom( + db + .selectFrom("log_checkpoints") + .select("checkpoint") + .unionAll( + db.selectFrom("block_checkpoints").select("checkpoint"), + ) + .unionAll( + db + .selectFrom("call_trace_checkpoints") + .select("checkpoint"), + ) + .as("all_checkpoints"), + ) + .select( + sql`coalesce(max(checkpoint), ${encodedToCheckpoint})`.as( + "max_checkpoint", + ), + ), + ) .with("events", (db) => db .selectFrom("logs") @@ -1931,7 +1983,13 @@ export class PostgresSyncStore implements SyncStore { ]), ) .where("events.checkpoint", ">", cursor) - .where("events.checkpoint", "<=", encodedToCheckpoint) + .where( + "events.checkpoint", + "<=", + // Use the theoretical max checkpoint across all source types + // For details, read https://github.com/ponder-sh/ponder/pull/907/files + sql`( select max_checkpoint from max_checkpoint )`, + ) .orderBy("events.checkpoint", "asc") .limit(limit + 1) .execute(); diff --git a/packages/core/src/sync-store/sqlite/store.ts b/packages/core/src/sync-store/sqlite/store.ts index 4b7605d7c..b6c118efb 100644 --- a/packages/core/src/sync-store/sqlite/store.ts +++ b/packages/core/src/sync-store/sqlite/store.ts @@ -1728,6 +1728,58 @@ export class SqliteSyncStore implements SyncStore { ) } )`, ) + .with("log_checkpoints", (db) => + db + .selectFrom("logs") + .where("logs.checkpoint", ">", cursor) + .where("logs.checkpoint", "<=", encodedToCheckpoint) + .orderBy("logs.checkpoint", "asc") + .offset(limit) + .limit(1) + .select("logs.checkpoint"), + ) + .with("block_checkpoints", (db) => + db + .selectFrom("blocks") + .where("blocks.checkpoint", ">", cursor) + .where("blocks.checkpoint", "<=", encodedToCheckpoint) + .orderBy("blocks.checkpoint", "asc") + .offset(limit) + .limit(1) + .select("blocks.checkpoint"), + ) + .with("call_trace_checkpoints", (db) => + db + .selectFrom("callTraces") + .where("callTraces.checkpoint", ">", cursor) + .where("callTraces.checkpoint", "<=", encodedToCheckpoint) + .orderBy("callTraces.checkpoint", "asc") + .offset(limit) + .limit(1) + .select("callTraces.checkpoint"), + ) + .with("max_checkpoint", (db) => + db + .selectFrom( + db + .selectFrom("log_checkpoints") + .select("checkpoint") + .unionAll( + db.selectFrom("block_checkpoints").select("checkpoint"), + ) + .unionAll( + db + .selectFrom("call_trace_checkpoints") + .select("checkpoint"), + ) + .as("all_checkpoints"), + ) + .select( + sql`coalesce(max(checkpoint), ${encodedToCheckpoint})`.as( + "max_checkpoint", + ), + ), + ) .with("events", (db) => db .selectFrom("logs") @@ -1970,7 +2022,13 @@ export class SqliteSyncStore implements SyncStore { ]), ) .where("events.checkpoint", ">", cursor) - .where("events.checkpoint", "<=", encodedToCheckpoint) + .where( + "events.checkpoint", + "<=", + // Use the theoretical max checkpoint across all source types + // For details, read https://github.com/ponder-sh/ponder/pull/907/files + sql`( select max_checkpoint from max_checkpoint )`, + ) .orderBy("events.checkpoint", "asc") .limit(limit + 1) .execute();