From 8aac2a631948a359b7295172d29e183999b37108 Mon Sep 17 00:00:00 2001
From: Ashok Menon <ashok@mystenlabs.com>
Date: Sat, 12 Oct 2024 16:08:06 +0100
Subject: [PATCH 01/33] indexer-alt: initial commit

---
 Cargo.lock                         |  8 ++++++++
 Cargo.toml                         |  1 +
 crates/sui-indexer-alt/Cargo.toml  | 15 +++++++++++++++
 crates/sui-indexer-alt/src/lib.rs  |  0
 crates/sui-indexer-alt/src/main.rs |  6 ++++++
 5 files changed, 30 insertions(+)
 create mode 100644 crates/sui-indexer-alt/Cargo.toml
 create mode 100644 crates/sui-indexer-alt/src/lib.rs
 create mode 100644 crates/sui-indexer-alt/src/main.rs

diff --git a/Cargo.lock b/Cargo.lock
index 893ce321ecba4..577a022c2418e 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -13783,6 +13783,14 @@ dependencies = [
  "url",
 ]
 
+[[package]]
+name = "sui-indexer-alt"
+version = "1.37.0"
+dependencies = [
+ "anyhow",
+ "sui-types",
+]
+
 [[package]]
 name = "sui-indexer-builder"
 version = "0.1.0"
diff --git a/Cargo.toml b/Cargo.toml
index f9ec2171a1322..155f56f3657ab 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -114,6 +114,7 @@ members = [
     "crates/sui-graphql-rpc-client",
     "crates/sui-graphql-rpc-headers",
     "crates/sui-indexer",
+    "crates/sui-indexer-alt",
     "crates/sui-indexer-builder",
     "crates/sui-json",
     "crates/sui-json-rpc",
diff --git a/crates/sui-indexer-alt/Cargo.toml b/crates/sui-indexer-alt/Cargo.toml
new file mode 100644
index 0000000000000..41f357940d963
--- /dev/null
+++ b/crates/sui-indexer-alt/Cargo.toml
@@ -0,0 +1,15 @@
+[package]
+name = "sui-indexer-alt"
+version.workspace = true
+authors = ["Mysten Labs <build@mystenlabs.com>"]
+license = "Apache-2.0"
+publish = false
+edition = "2021"
+
+[[bin]]
+name = "sui-indexer-alt"
+path = "src/main.rs"
+
+[dependencies]
+anyhow.workspace = true
+sui-types.workspace = true
diff --git a/crates/sui-indexer-alt/src/lib.rs b/crates/sui-indexer-alt/src/lib.rs
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/crates/sui-indexer-alt/src/main.rs b/crates/sui-indexer-alt/src/main.rs
new file mode 100644
index 0000000000000..21e6521ce878c
--- /dev/null
+++ b/crates/sui-indexer-alt/src/main.rs
@@ -0,0 +1,6 @@
+// Copyright (c) Mysten Labs, Inc.
+// SPDX-License-Identifier: Apache-2.0
+
+fn main() {
+    println!("Hello, world!");
+}

From 9535f9c3af4391aa0321a026ee6c7cfeb417caef Mon Sep 17 00:00:00 2001
From: Ashok Menon <ashok@mystenlabs.com>
Date: Sat, 12 Oct 2024 16:15:13 +0100
Subject: [PATCH 02/33] indexer-alt: argument parsing

---
 Cargo.lock                         |  2 ++
 crates/sui-indexer-alt/Cargo.toml  |  3 +++
 crates/sui-indexer-alt/src/args.rs | 10 ++++++++++
 crates/sui-indexer-alt/src/lib.rs  |  4 ++++
 crates/sui-indexer-alt/src/main.rs |  7 ++++++-
 5 files changed, 25 insertions(+), 1 deletion(-)
 create mode 100644 crates/sui-indexer-alt/src/args.rs

diff --git a/Cargo.lock b/Cargo.lock
index 577a022c2418e..9e185207c931a 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -13788,7 +13788,9 @@ name = "sui-indexer-alt"
 version = "1.37.0"
 dependencies = [
  "anyhow",
+ "clap",
  "sui-types",
+ "url",
 ]
 
 [[package]]
diff --git a/crates/sui-indexer-alt/Cargo.toml b/crates/sui-indexer-alt/Cargo.toml
index 41f357940d963..16514df6755b6 100644
--- a/crates/sui-indexer-alt/Cargo.toml
+++ b/crates/sui-indexer-alt/Cargo.toml
@@ -12,4 +12,7 @@ path = "src/main.rs"
 
 [dependencies]
 anyhow.workspace = true
+clap.workspace = true
+url.workspace = true
+
 sui-types.workspace = true
diff --git a/crates/sui-indexer-alt/src/args.rs b/crates/sui-indexer-alt/src/args.rs
new file mode 100644
index 0000000000000..af4b73fb473e8
--- /dev/null
+++ b/crates/sui-indexer-alt/src/args.rs
@@ -0,0 +1,10 @@
+// Copyright (c) Mysten Labs, Inc.
+// SPDX-License-Identifier: Apache-2.0
+
+use url::Url;
+
+#[derive(clap::Parser, Debug, Clone)]
+pub struct Args {
+    #[arg(long)]
+    pub remote_store_url: Url,
+}
diff --git a/crates/sui-indexer-alt/src/lib.rs b/crates/sui-indexer-alt/src/lib.rs
index e69de29bb2d1d..d1d47f5b6c1f0 100644
--- a/crates/sui-indexer-alt/src/lib.rs
+++ b/crates/sui-indexer-alt/src/lib.rs
@@ -0,0 +1,4 @@
+// Copyright (c) Mysten Labs, Inc.
+// SPDX-License-Identifier: Apache-2.0
+
+pub mod args;
diff --git a/crates/sui-indexer-alt/src/main.rs b/crates/sui-indexer-alt/src/main.rs
index 21e6521ce878c..c2b6fb4b76f0f 100644
--- a/crates/sui-indexer-alt/src/main.rs
+++ b/crates/sui-indexer-alt/src/main.rs
@@ -1,6 +1,11 @@
 // Copyright (c) Mysten Labs, Inc.
 // SPDX-License-Identifier: Apache-2.0
 
+use clap::Parser;
+use sui_indexer_alt::args::Args;
+
 fn main() {
-    println!("Hello, world!");
+    let args = Args::parse();
+
+    println!("Hello, remote-store-url: {}!", args.remote_store_url);
 }

From 4de178be1f75ecc98b28b280fb1f41a436aeb5f6 Mon Sep 17 00:00:00 2001
From: Ashok Menon <ashok@mystenlabs.com>
Date: Sat, 12 Oct 2024 17:40:34 +0100
Subject: [PATCH 03/33] indexer-alt: Fetch checkpoints from remote store

---
 Cargo.lock                                  |  5 ++
 crates/sui-indexer-alt/Cargo.toml           |  5 ++
 crates/sui-indexer-alt/src/args.rs          |  5 ++
 crates/sui-indexer-alt/src/ingestion/mod.rs | 60 +++++++++++++++++++++
 crates/sui-indexer-alt/src/lib.rs           |  1 +
 crates/sui-indexer-alt/src/main.rs          | 38 +++++++++++--
 6 files changed, 111 insertions(+), 3 deletions(-)
 create mode 100644 crates/sui-indexer-alt/src/ingestion/mod.rs

diff --git a/Cargo.lock b/Cargo.lock
index 9e185207c931a..a88d96cccd668 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -13788,8 +13788,13 @@ name = "sui-indexer-alt"
 version = "1.37.0"
 dependencies = [
  "anyhow",
+ "bcs",
  "clap",
+ "reqwest 0.12.5",
+ "serde",
+ "sui-storage",
  "sui-types",
+ "tokio",
  "url",
 ]
 
diff --git a/crates/sui-indexer-alt/Cargo.toml b/crates/sui-indexer-alt/Cargo.toml
index 16514df6755b6..e3f3b7f425c30 100644
--- a/crates/sui-indexer-alt/Cargo.toml
+++ b/crates/sui-indexer-alt/Cargo.toml
@@ -12,7 +12,12 @@ path = "src/main.rs"
 
 [dependencies]
 anyhow.workspace = true
+bcs.workspace = true
 clap.workspace = true
+reqwest.workspace = true
+serde.workspace = true
+tokio.workspace = true
 url.workspace = true
 
+sui-storage.workspace = true
 sui-types.workspace = true
diff --git a/crates/sui-indexer-alt/src/args.rs b/crates/sui-indexer-alt/src/args.rs
index af4b73fb473e8..0b9ca9fd1e2f9 100644
--- a/crates/sui-indexer-alt/src/args.rs
+++ b/crates/sui-indexer-alt/src/args.rs
@@ -5,6 +5,11 @@ use url::Url;
 
 #[derive(clap::Parser, Debug, Clone)]
 pub struct Args {
+    /// First checkpoint to start indexing from.
+    #[arg(long, default_value_t = 0)]
+    pub start: u64,
+
+    /// Remote Store to fetch CheckpointData from.
     #[arg(long)]
     pub remote_store_url: Url,
 }
diff --git a/crates/sui-indexer-alt/src/ingestion/mod.rs b/crates/sui-indexer-alt/src/ingestion/mod.rs
new file mode 100644
index 0000000000000..b8d7fe3ad65d3
--- /dev/null
+++ b/crates/sui-indexer-alt/src/ingestion/mod.rs
@@ -0,0 +1,60 @@
+// Copyright (c) Mysten Labs, Inc.
+// SPDX-License-Identifier: Apache-2.0
+
+use std::sync::Arc;
+
+use anyhow::{bail, Context, Result};
+use reqwest::Client;
+use sui_storage::blob::Blob;
+use sui_types::full_checkpoint_content::CheckpointData;
+use url::Url;
+
+pub struct IngestionClient {
+    url: Url,
+    client: Client,
+}
+
+impl IngestionClient {
+    pub fn new(url: Url) -> Result<Self> {
+        Ok(Self {
+            url,
+            client: Client::builder().build()?,
+        })
+    }
+
+    pub async fn fetch(&self, checkpoint: u64) -> Result<Arc<CheckpointData>> {
+        // SAFETY: The path being joined is statically known to be valid.
+        let url = self
+            .url
+            .join(&format!("/{checkpoint}.chk"))
+            .expect("Unexpected invalid URL");
+
+        let response = self
+            .client
+            .get(url)
+            .send()
+            .await
+            .with_context(|| format!("Failed to fetch checkpoint {checkpoint}"))?;
+
+        if !response.status().is_success() {
+            if let Some(reason) = response.status().canonical_reason() {
+                bail!("Failed to fetch checkpoint {checkpoint}: {reason}");
+            } else {
+                bail!(
+                    "Failed to fetch checkpoint {checkpoint}: {}",
+                    response.status()
+                );
+            }
+        }
+
+        let bytes = response
+            .bytes()
+            .await
+            .with_context(|| format!("Failed to read checkpoint {checkpoint}"))?;
+
+        let data: CheckpointData = Blob::from_bytes(&bytes)
+            .with_context(|| format!("Failed to decode checkpoint {checkpoint}"))?;
+
+        Ok(Arc::new(data))
+    }
+}
diff --git a/crates/sui-indexer-alt/src/lib.rs b/crates/sui-indexer-alt/src/lib.rs
index d1d47f5b6c1f0..958abc017b361 100644
--- a/crates/sui-indexer-alt/src/lib.rs
+++ b/crates/sui-indexer-alt/src/lib.rs
@@ -2,3 +2,4 @@
 // SPDX-License-Identifier: Apache-2.0
 
 pub mod args;
+pub mod ingestion;
diff --git a/crates/sui-indexer-alt/src/main.rs b/crates/sui-indexer-alt/src/main.rs
index c2b6fb4b76f0f..174cf8cc1e7cd 100644
--- a/crates/sui-indexer-alt/src/main.rs
+++ b/crates/sui-indexer-alt/src/main.rs
@@ -1,11 +1,43 @@
 // Copyright (c) Mysten Labs, Inc.
 // SPDX-License-Identifier: Apache-2.0
 
+use anyhow::Result;
 use clap::Parser;
-use sui_indexer_alt::args::Args;
+use sui_indexer_alt::{args::Args, ingestion::IngestionClient};
 
-fn main() {
+#[tokio::main]
+async fn main() -> Result<()> {
     let args = Args::parse();
 
-    println!("Hello, remote-store-url: {}!", args.remote_store_url);
+    println!("Fetching from {}", args.remote_store_url);
+
+    let client = IngestionClient::new(args.remote_store_url)?;
+
+    let checkpoint = client.fetch(args.start).await?;
+    println!(
+        "Fetch checkpoint {cp}:\n\
+         #transactions   = {txs}\n\
+         #events         = {evs}\n\
+         #input-objects  = {ins}\n\
+         #output-objects = {outs}",
+        cp = args.start,
+        txs = checkpoint.transactions.len(),
+        evs = checkpoint
+            .transactions
+            .iter()
+            .map(|tx| tx.events.as_ref().map_or(0, |evs| evs.data.len()))
+            .sum::<usize>(),
+        ins = checkpoint
+            .transactions
+            .iter()
+            .map(|tx| tx.input_objects.len())
+            .sum::<usize>(),
+        outs = checkpoint
+            .transactions
+            .iter()
+            .map(|tx| tx.output_objects.len())
+            .sum::<usize>(),
+    );
+
+    Ok(())
 }

From 12c37e6375f9ae8afde72bb5cf2e16daeef364bf Mon Sep 17 00:00:00 2001
From: Ashok Menon <ashok@mystenlabs.com>
Date: Sat, 12 Oct 2024 19:03:35 +0100
Subject: [PATCH 04/33] indexer-alt: detect transient errors and retry

## Description

Add logic to detect specific errors and retry on just those, while
surfacing client errors back up. The retry backoff is configured to not
ever give up under an assumption that an ingestion pipeline is designed
to keep chugging along regardless of what happens around it, and the
world will shift around it to correct for transient errors.

## Test plan

New unit tests:

```
sui$ cargo nextest run -p sui-indexer-alt
```
---
 Cargo.lock                                  | 118 +++++++++++++
 Cargo.toml                                  |   1 +
 crates/sui-indexer-alt/Cargo.toml           |   5 +
 crates/sui-indexer-alt/src/ingestion/mod.rs | 175 +++++++++++++++++---
 4 files changed, 274 insertions(+), 25 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index a88d96cccd668..27f3687059bfc 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -794,6 +794,16 @@ dependencies = [
  "syn 1.0.107",
 ]
 
+[[package]]
+name = "assert-json-diff"
+version = "2.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "47e4f2b81832e72834d7518d8487a0396a28cc408186a2e8854c0f98011faf12"
+dependencies = [
+ "serde",
+ "serde_json",
+]
+
 [[package]]
 name = "assert_cmd"
 version = "2.0.7"
@@ -808,6 +818,17 @@ dependencies = [
  "wait-timeout",
 ]
 
+[[package]]
+name = "async-channel"
+version = "1.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "81953c529336010edd6d8e358f886d9581267795c61b19475b71314bffa46d35"
+dependencies = [
+ "concurrent-queue",
+ "event-listener",
+ "futures-core",
+]
+
 [[package]]
 name = "async-compression"
 version = "0.4.6"
@@ -2703,6 +2724,15 @@ dependencies = [
  "unicode-width",
 ]
 
+[[package]]
+name = "concurrent-queue"
+version = "2.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4ca0197aee26d1ae37445ee532fefce43251d24cc7c166799f4d46817f1d3973"
+dependencies = [
+ "crossbeam-utils",
+]
+
 [[package]]
 name = "consensus-config"
 version = "0.1.0"
@@ -3533,6 +3563,25 @@ dependencies = [
  "walkdir",
 ]
 
+[[package]]
+name = "deadpool"
+version = "0.9.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "421fe0f90f2ab22016f32a9881be5134fdd71c65298917084b0c7477cbc3856e"
+dependencies = [
+ "async-trait",
+ "deadpool-runtime",
+ "num_cpus",
+ "retain_mut",
+ "tokio",
+]
+
+[[package]]
+name = "deadpool-runtime"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "092966b41edc516079bdf31ec78a2e0588d1d0c08f78b91d8307215928642b2b"
+
 [[package]]
 name = "debug-ignore"
 version = "1.0.5"
@@ -5726,6 +5775,27 @@ version = "0.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "08a397c49fec283e3d6211adbe480be95aae5f304cfb923e9970e08956d5168a"
 
+[[package]]
+name = "http-types"
+version = "2.12.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6e9b187a72d63adbfba487f48095306ac823049cb504ee195541e91c7775f5ad"
+dependencies = [
+ "anyhow",
+ "async-channel",
+ "base64 0.13.1",
+ "futures-lite",
+ "http 0.2.9",
+ "infer",
+ "pin-project-lite",
+ "rand 0.7.3",
+ "serde",
+ "serde_json",
+ "serde_qs",
+ "serde_urlencoded",
+ "url",
+]
+
 [[package]]
 name = "httparse"
 version = "1.8.0"
@@ -6073,6 +6143,12 @@ dependencies = [
  "unicode-width",
 ]
 
+[[package]]
+name = "infer"
+version = "0.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "64e9829a50b42bb782c1df523f78d332fe371b10c661e78b7a3c34b0198e9fac"
+
 [[package]]
 name = "inotify"
 version = "0.9.6"
@@ -10813,6 +10889,12 @@ dependencies = [
  "wasm-timer",
 ]
 
+[[package]]
+name = "retain_mut"
+version = "0.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4389f1d5789befaf6029ebd9f7dac4af7f7e3d61b69d4f30e2ac02b57e7712b0"
+
 [[package]]
 name = "retry-policies"
 version = "0.3.0"
@@ -11765,6 +11847,17 @@ dependencies = [
  "serde",
 ]
 
+[[package]]
+name = "serde_qs"
+version = "0.8.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c7715380eec75f029a4ef7de39a9200e0a63823176b759d055b613f5a87df6a6"
+dependencies = [
+ "percent-encoding",
+ "serde",
+ "thiserror",
+]
+
 [[package]]
 name = "serde_repr"
 version = "0.1.10"
@@ -13788,14 +13881,17 @@ name = "sui-indexer-alt"
 version = "1.37.0"
 dependencies = [
  "anyhow",
+ "backoff",
  "bcs",
  "clap",
  "reqwest 0.12.5",
  "serde",
  "sui-storage",
  "sui-types",
+ "thiserror",
  "tokio",
  "url",
+ "wiremock",
 ]
 
 [[package]]
@@ -17590,6 +17686,28 @@ dependencies = [
  "windows-sys 0.48.0",
 ]
 
+[[package]]
+name = "wiremock"
+version = "0.5.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "13a3a53eaf34f390dd30d7b1b078287dd05df2aa2e21a589ccb80f5c7253c2e9"
+dependencies = [
+ "assert-json-diff",
+ "async-trait",
+ "base64 0.21.7",
+ "deadpool",
+ "futures",
+ "futures-timer",
+ "http-types",
+ "hyper 0.14.26",
+ "log",
+ "once_cell",
+ "regex",
+ "serde",
+ "serde_json",
+ "tokio",
+]
+
 [[package]]
 name = "ws_stream_wasm"
 version = "0.7.4"
diff --git a/Cargo.toml b/Cargo.toml
index 155f56f3657ab..642dfe11f885b 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -541,6 +541,7 @@ webpki = { version = "0.102", package = "rustls-webpki", features = [
     "alloc",
     "std",
 ] }
+wiremock = "0.5"
 x509-parser = "0.14.0"
 zstd = "0.12.3"
 zeroize = "1.6.0"
diff --git a/crates/sui-indexer-alt/Cargo.toml b/crates/sui-indexer-alt/Cargo.toml
index e3f3b7f425c30..2e57e644d1a66 100644
--- a/crates/sui-indexer-alt/Cargo.toml
+++ b/crates/sui-indexer-alt/Cargo.toml
@@ -12,12 +12,17 @@ path = "src/main.rs"
 
 [dependencies]
 anyhow.workspace = true
+backoff.workspace = true
 bcs.workspace = true
 clap.workspace = true
 reqwest.workspace = true
 serde.workspace = true
+thiserror.workspace = true
 tokio.workspace = true
 url.workspace = true
 
 sui-storage.workspace = true
 sui-types.workspace = true
+
+[dev-dependencies]
+wiremock.workspace = true
diff --git a/crates/sui-indexer-alt/src/ingestion/mod.rs b/crates/sui-indexer-alt/src/ingestion/mod.rs
index b8d7fe3ad65d3..6170ce66d4e1a 100644
--- a/crates/sui-indexer-alt/src/ingestion/mod.rs
+++ b/crates/sui-indexer-alt/src/ingestion/mod.rs
@@ -1,19 +1,39 @@
 // Copyright (c) Mysten Labs, Inc.
 // SPDX-License-Identifier: Apache-2.0
 
-use std::sync::Arc;
+use std::{sync::Arc, time::Duration};
 
-use anyhow::{bail, Context, Result};
-use reqwest::Client;
+use backoff::ExponentialBackoff;
+use reqwest::{Client, StatusCode};
 use sui_storage::blob::Blob;
 use sui_types::full_checkpoint_content::CheckpointData;
 use url::Url;
 
+/// Wait at most this long between retries for transient errors.
+const MAX_RETRY_INTERVAL: Duration = Duration::from_secs(60);
+
 pub struct IngestionClient {
     url: Url,
     client: Client,
 }
 
+type Result<T> = std::result::Result<T, Error>;
+
+#[derive(thiserror::Error, Debug)]
+pub enum Error {
+    #[error("Checkpoint {0} not found")]
+    NotFound(u64),
+
+    #[error("Failed to deserialize checkpoint {0}: {1}")]
+    DeserializationError(u64, #[source] anyhow::Error),
+
+    #[error("Failed to fetch checkpoint {0}: {1}")]
+    HttpError(u64, StatusCode),
+
+    #[error(transparent)]
+    ReqwestError(#[from] reqwest::Error),
+}
+
 impl IngestionClient {
     pub fn new(url: Url) -> Result<Self> {
         Ok(Self {
@@ -22,6 +42,9 @@ impl IngestionClient {
         })
     }
 
+    /// Fetch a checkpoint from the remote store. Repeatedly retries transient errors with an
+    /// exponential backoff (up to [MAX_RETRY_INTERVAL]), but will immediately return
+    /// non-transient errors, which include all client errors, except timeouts and rate limiting.
     pub async fn fetch(&self, checkpoint: u64) -> Result<Arc<CheckpointData>> {
         // SAFETY: The path being joined is statically known to be valid.
         let url = self
@@ -29,32 +52,134 @@ impl IngestionClient {
             .join(&format!("/{checkpoint}.chk"))
             .expect("Unexpected invalid URL");
 
-        let response = self
-            .client
-            .get(url)
-            .send()
-            .await
-            .with_context(|| format!("Failed to fetch checkpoint {checkpoint}"))?;
-
-        if !response.status().is_success() {
-            if let Some(reason) = response.status().canonical_reason() {
-                bail!("Failed to fetch checkpoint {checkpoint}: {reason}");
-            } else {
-                bail!(
-                    "Failed to fetch checkpoint {checkpoint}: {}",
-                    response.status()
-                );
+        let request = move || {
+            let url = url.clone();
+            async move {
+                let response = self
+                    .client
+                    .get(url)
+                    .send()
+                    .await
+                    .expect("Unexpected error building request");
+
+                use backoff::Error as BE;
+                match response.status() {
+                    code if code.is_success() => Ok(response),
+
+                    // Treat 404s as a special case so we can match on this error type.
+                    StatusCode::NOT_FOUND => Err(BE::Permanent(Error::NotFound(checkpoint))),
+
+                    // Timeouts are a client error but they are usually transient.
+                    code @ StatusCode::REQUEST_TIMEOUT => {
+                        Err(BE::transient(Error::HttpError(checkpoint, code)))
+                    }
+
+                    // Rate limiting is also a client error, but the backoff will eventually widen the
+                    // interval appropriately.
+                    code @ StatusCode::TOO_MANY_REQUESTS => {
+                        Err(BE::transient(Error::HttpError(checkpoint, code)))
+                    }
+
+                    // Assume that if the server is facing difficulties, it will recover eventually.
+                    code if code.is_server_error() => {
+                        Err(BE::transient(Error::HttpError(checkpoint, code)))
+                    }
+
+                    // For everything else, assume it's a permanent error and don't retry.
+                    code => Err(BE::Permanent(Error::HttpError(checkpoint, code))),
+                }
             }
-        }
+        };
+
+        // Keep backing off until we are waiting for the max interval, but don't give up.
+        let backoff = ExponentialBackoff {
+            max_interval: MAX_RETRY_INTERVAL,
+            max_elapsed_time: None,
+            ..Default::default()
+        };
 
-        let bytes = response
+        let bytes = backoff::future::retry(backoff, request)
+            .await?
             .bytes()
-            .await
-            .with_context(|| format!("Failed to read checkpoint {checkpoint}"))?;
+            .await?;
+
+        Ok(Arc::new(
+            Blob::from_bytes(&bytes).map_err(|e| Error::DeserializationError(checkpoint, e))?,
+        ))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::sync::Mutex;
+
+    use wiremock::{
+        matchers::{method, path_regex},
+        Mock, MockServer, Request, Respond, ResponseTemplate,
+    };
+
+    use super::*;
+
+    async fn respond_with(server: &MockServer, response: impl Respond + 'static) {
+        Mock::given(method("GET"))
+            .and(path_regex(r"/\d+.chk"))
+            .respond_with(response)
+            .mount(server)
+            .await;
+    }
+
+    fn status(code: StatusCode) -> ResponseTemplate {
+        ResponseTemplate::new(code.as_u16())
+    }
+
+    #[tokio::test]
+    async fn not_found() {
+        let server = MockServer::start().await;
+        respond_with(&server, status(StatusCode::NOT_FOUND)).await;
+
+        let client = IngestionClient::new(Url::parse(&server.uri()).unwrap()).unwrap();
+        let error = client.fetch(42).await.unwrap_err();
+
+        assert!(matches!(error, Error::NotFound(42)));
+    }
+
+    #[tokio::test]
+    async fn client_error() {
+        let server = MockServer::start().await;
+        respond_with(&server, status(StatusCode::IM_A_TEAPOT)).await;
+
+        let client = IngestionClient::new(Url::parse(&server.uri()).unwrap()).unwrap();
+        let error = client.fetch(42).await.unwrap_err();
+
+        assert!(matches!(
+            error,
+            Error::HttpError(42, StatusCode::IM_A_TEAPOT)
+        ));
+    }
+
+    #[tokio::test]
+    async fn transient_server_error() {
+        let server = MockServer::start().await;
+
+        let times: Mutex<u64> = Mutex::new(0);
+        respond_with(&server, move |_: &Request| {
+            let mut times = times.lock().unwrap();
+            *times += 1;
+            status(match *times {
+                1 => StatusCode::INTERNAL_SERVER_ERROR,
+                2 => StatusCode::REQUEST_TIMEOUT,
+                3 => StatusCode::TOO_MANY_REQUESTS,
+                _ => StatusCode::IM_A_TEAPOT,
+            })
+        })
+        .await;
 
-        let data: CheckpointData = Blob::from_bytes(&bytes)
-            .with_context(|| format!("Failed to decode checkpoint {checkpoint}"))?;
+        let client = IngestionClient::new(Url::parse(&server.uri()).unwrap()).unwrap();
+        let error = client.fetch(42).await.unwrap_err();
 
-        Ok(Arc::new(data))
+        assert!(matches!(
+            error,
+            Error::HttpError(42, StatusCode::IM_A_TEAPOT)
+        ));
     }
 }

From 879ae95ab9b734436a99e85b00c28266433fc6cb Mon Sep 17 00:00:00 2001
From: Ashok Menon <ashok@mystenlabs.com>
Date: Sun, 13 Oct 2024 00:00:29 +0100
Subject: [PATCH 05/33] indexer-alt: tracing and telemetry

## Description

Set-up the telmetry subscriber, and a prometheus metrics service.
Instrument the `IngestionClient` to count statistics related to
checkpoints being ingested.

## Test plan

```
sui$ cargo run -p sui-indexer-alt -- --remote-store-url
https://checkpoints.mainnet.sui.io
```
---
 Cargo.lock                                  |   6 +
 crates/sui-indexer-alt/Cargo.toml           |   6 +
 crates/sui-indexer-alt/src/args.rs          |   6 +
 crates/sui-indexer-alt/src/ingestion/mod.rs |  61 +++++++-
 crates/sui-indexer-alt/src/lib.rs           |   1 +
 crates/sui-indexer-alt/src/main.rs          |  39 +++--
 crates/sui-indexer-alt/src/metrics.rs       | 152 ++++++++++++++++++++
 7 files changed, 253 insertions(+), 18 deletions(-)
 create mode 100644 crates/sui-indexer-alt/src/metrics.rs

diff --git a/Cargo.lock b/Cargo.lock
index 27f3687059bfc..68b19a75c46f7 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -13881,15 +13881,21 @@ name = "sui-indexer-alt"
 version = "1.37.0"
 dependencies = [
  "anyhow",
+ "axum 0.7.5",
  "backoff",
  "bcs",
  "clap",
+ "mysten-metrics",
+ "prometheus",
  "reqwest 0.12.5",
  "serde",
  "sui-storage",
  "sui-types",
+ "telemetry-subscribers",
  "thiserror",
  "tokio",
+ "tokio-util 0.7.10",
+ "tracing",
  "url",
  "wiremock",
 ]
diff --git a/crates/sui-indexer-alt/Cargo.toml b/crates/sui-indexer-alt/Cargo.toml
index 2e57e644d1a66..12a08ec2ee0a7 100644
--- a/crates/sui-indexer-alt/Cargo.toml
+++ b/crates/sui-indexer-alt/Cargo.toml
@@ -12,15 +12,21 @@ path = "src/main.rs"
 
 [dependencies]
 anyhow.workspace = true
+axum.workspace = true
 backoff.workspace = true
 bcs.workspace = true
 clap.workspace = true
+prometheus.workspace = true
 reqwest.workspace = true
 serde.workspace = true
+telemetry-subscribers.workspace = true
 thiserror.workspace = true
 tokio.workspace = true
+tokio-util.workspace = true
+tracing.workspace = true
 url.workspace = true
 
+mysten-metrics.workspace = true
 sui-storage.workspace = true
 sui-types.workspace = true
 
diff --git a/crates/sui-indexer-alt/src/args.rs b/crates/sui-indexer-alt/src/args.rs
index 0b9ca9fd1e2f9..282bb847c25f3 100644
--- a/crates/sui-indexer-alt/src/args.rs
+++ b/crates/sui-indexer-alt/src/args.rs
@@ -1,6 +1,8 @@
 // Copyright (c) Mysten Labs, Inc.
 // SPDX-License-Identifier: Apache-2.0
 
+use std::net::SocketAddr;
+
 use url::Url;
 
 #[derive(clap::Parser, Debug, Clone)]
@@ -12,4 +14,8 @@ pub struct Args {
     /// Remote Store to fetch CheckpointData from.
     #[arg(long)]
     pub remote_store_url: Url,
+
+    /// Address to serve Prometheus Metrics from.
+    #[arg(long, default_value = "0.0.0.0:9184")]
+    pub metrics_address: SocketAddr,
 }
diff --git a/crates/sui-indexer-alt/src/ingestion/mod.rs b/crates/sui-indexer-alt/src/ingestion/mod.rs
index 6170ce66d4e1a..f9e7059a4255d 100644
--- a/crates/sui-indexer-alt/src/ingestion/mod.rs
+++ b/crates/sui-indexer-alt/src/ingestion/mod.rs
@@ -9,12 +9,15 @@ use sui_storage::blob::Blob;
 use sui_types::full_checkpoint_content::CheckpointData;
 use url::Url;
 
+use crate::metrics::IndexerMetrics;
+
 /// Wait at most this long between retries for transient errors.
 const MAX_RETRY_INTERVAL: Duration = Duration::from_secs(60);
 
 pub struct IngestionClient {
     url: Url,
     client: Client,
+    metrics: IndexerMetrics,
 }
 
 type Result<T> = std::result::Result<T, Error>;
@@ -35,10 +38,11 @@ pub enum Error {
 }
 
 impl IngestionClient {
-    pub fn new(url: Url) -> Result<Self> {
+    pub fn new(url: Url, metrics: IndexerMetrics) -> Result<Self> {
         Ok(Self {
             url,
             client: Client::builder().build()?,
+            metrics,
         })
     }
 
@@ -71,17 +75,20 @@ impl IngestionClient {
 
                     // Timeouts are a client error but they are usually transient.
                     code @ StatusCode::REQUEST_TIMEOUT => {
+                        self.metrics.total_ingested_retries.inc();
                         Err(BE::transient(Error::HttpError(checkpoint, code)))
                     }
 
                     // Rate limiting is also a client error, but the backoff will eventually widen the
                     // interval appropriately.
                     code @ StatusCode::TOO_MANY_REQUESTS => {
+                        self.metrics.total_ingested_retries.inc();
                         Err(BE::transient(Error::HttpError(checkpoint, code)))
                     }
 
                     // Assume that if the server is facing difficulties, it will recover eventually.
                     code if code.is_server_error() => {
+                        self.metrics.total_ingested_retries.inc();
                         Err(BE::transient(Error::HttpError(checkpoint, code)))
                     }
 
@@ -98,14 +105,48 @@ impl IngestionClient {
             ..Default::default()
         };
 
+        let _guard = self.metrics.ingested_checkpoint_latency.start_timer();
+
         let bytes = backoff::future::retry(backoff, request)
             .await?
             .bytes()
             .await?;
 
-        Ok(Arc::new(
-            Blob::from_bytes(&bytes).map_err(|e| Error::DeserializationError(checkpoint, e))?,
-        ))
+        let checkpoint: CheckpointData =
+            Blob::from_bytes(&bytes).map_err(|e| Error::DeserializationError(checkpoint, e))?;
+
+        self.metrics.total_ingested_checkpoints.inc();
+        self.metrics.total_ingested_bytes.inc_by(bytes.len() as u64);
+
+        self.metrics
+            .total_ingested_transactions
+            .inc_by(checkpoint.transactions.len() as u64);
+
+        self.metrics.total_ingested_events.inc_by(
+            checkpoint
+                .transactions
+                .iter()
+                .map(|tx| tx.events.as_ref().map_or(0, |evs| evs.data.len()) as u64)
+                .sum(),
+        );
+
+        self.metrics.total_ingested_inputs.inc_by(
+            checkpoint
+                .transactions
+                .iter()
+                .map(|tx| tx.input_objects.len() as u64)
+                .sum(),
+        );
+
+        self.metrics.total_ingested_outputs.inc_by(
+            checkpoint
+                .transactions
+                .iter()
+                .map(|tx| tx.output_objects.len() as u64)
+                .sum(),
+        );
+
+        Ok(Arc::new(checkpoint))
     }
 }
 
@@ -118,6 +159,8 @@ mod tests {
         Mock, MockServer, Request, Respond, ResponseTemplate,
     };
 
+    use crate::metrics::tests::test_metrics;
+
     use super::*;
 
     async fn respond_with(server: &MockServer, response: impl Respond + 'static) {
@@ -132,12 +175,16 @@ mod tests {
         ResponseTemplate::new(code.as_u16())
     }
 
+    fn test_client(uri: String) -> IngestionClient {
+        IngestionClient::new(Url::parse(&uri).unwrap(), test_metrics()).unwrap()
+    }
+
     #[tokio::test]
     async fn not_found() {
         let server = MockServer::start().await;
         respond_with(&server, status(StatusCode::NOT_FOUND)).await;
 
-        let client = IngestionClient::new(Url::parse(&server.uri()).unwrap()).unwrap();
+        let client = test_client(server.uri());
         let error = client.fetch(42).await.unwrap_err();
 
         assert!(matches!(error, Error::NotFound(42)));
@@ -148,7 +195,7 @@ mod tests {
         let server = MockServer::start().await;
         respond_with(&server, status(StatusCode::IM_A_TEAPOT)).await;
 
-        let client = IngestionClient::new(Url::parse(&server.uri()).unwrap()).unwrap();
+        let client = test_client(server.uri());
         let error = client.fetch(42).await.unwrap_err();
 
         assert!(matches!(
@@ -174,7 +221,7 @@ mod tests {
         })
         .await;
 
-        let client = IngestionClient::new(Url::parse(&server.uri()).unwrap()).unwrap();
+        let client = test_client(server.uri());
         let error = client.fetch(42).await.unwrap_err();
 
         assert!(matches!(
diff --git a/crates/sui-indexer-alt/src/lib.rs b/crates/sui-indexer-alt/src/lib.rs
index 958abc017b361..ed434c4f77687 100644
--- a/crates/sui-indexer-alt/src/lib.rs
+++ b/crates/sui-indexer-alt/src/lib.rs
@@ -3,3 +3,4 @@
 
 pub mod args;
 pub mod ingestion;
+pub mod metrics;
diff --git a/crates/sui-indexer-alt/src/main.rs b/crates/sui-indexer-alt/src/main.rs
index 174cf8cc1e7cd..54b186d62547e 100644
--- a/crates/sui-indexer-alt/src/main.rs
+++ b/crates/sui-indexer-alt/src/main.rs
@@ -1,26 +1,37 @@
 // Copyright (c) Mysten Labs, Inc.
 // SPDX-License-Identifier: Apache-2.0
 
-use anyhow::Result;
+use anyhow::{Context, Result};
 use clap::Parser;
-use sui_indexer_alt::{args::Args, ingestion::IngestionClient};
+use sui_indexer_alt::{args::Args, ingestion::IngestionClient, metrics::MetricsService};
+use tokio::signal;
+use tokio_util::sync::CancellationToken;
+use tracing::info;
 
 #[tokio::main]
 async fn main() -> Result<()> {
     let args = Args::parse();
 
-    println!("Fetching from {}", args.remote_store_url);
+    // Enable tracing, configured by environment variables.
+    let _guard = telemetry_subscribers::TelemetryConfig::new()
+        .with_env()
+        .init();
 
-    let client = IngestionClient::new(args.remote_store_url)?;
+    let cancel = CancellationToken::new();
 
+    let (metrics, metrics_service) = MetricsService::new(args.metrics_address, cancel.clone())?;
+
+    let metrics_handle = metrics_service
+        .run()
+        .await
+        .context("Failed to start metrics service")?;
+
+    info!("Fetching {}", args.remote_store_url);
+
+    let client = IngestionClient::new(args.remote_store_url, metrics.clone())?;
     let checkpoint = client.fetch(args.start).await?;
-    println!(
-        "Fetch checkpoint {cp}:\n\
-         #transactions   = {txs}\n\
-         #events         = {evs}\n\
-         #input-objects  = {ins}\n\
-         #output-objects = {outs}",
-        cp = args.start,
+
+    info!(
         txs = checkpoint.transactions.len(),
         evs = checkpoint
             .transactions
@@ -37,7 +48,13 @@ async fn main() -> Result<()> {
             .iter()
             .map(|tx| tx.output_objects.len())
             .sum::<usize>(),
+        "Fetch checkpoint {}",
+        args.start,
     );
 
+    // Once we receive a Ctrl-C, notify all services to shutdown, and wait for them to finish.
+    signal::ctrl_c().await.unwrap();
+    cancel.cancel();
+    metrics_handle.await.unwrap();
     Ok(())
 }
diff --git a/crates/sui-indexer-alt/src/metrics.rs b/crates/sui-indexer-alt/src/metrics.rs
new file mode 100644
index 0000000000000..c1a27fdb726ae
--- /dev/null
+++ b/crates/sui-indexer-alt/src/metrics.rs
@@ -0,0 +1,152 @@
+// Copyright (c) Mysten Labs, Inc.
+// SPDX-License-Identifier: Apache-2.0
+
+use std::net::SocketAddr;
+
+use anyhow::Result;
+use axum::{extract::Extension, routing::get, Router};
+use mysten_metrics::RegistryService;
+use prometheus::{
+    register_histogram_with_registry, register_int_counter_with_registry, Histogram, IntCounter,
+    Registry,
+};
+use tokio::{net::TcpListener, task::JoinHandle};
+use tokio_util::sync::CancellationToken;
+use tracing::info;
+
+/// Histogram buckets for the distribution of checkpoint fetching latencies.
+const INGESTION_LATENCY_SEC_BUCKETS: &[f64] = &[
+    0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0, 20.0, 50.0, 100.0,
+];
+
+/// Service to expose prometheus metrics from the indexer.
+pub struct MetricsService {
+    addr: SocketAddr,
+    service: RegistryService,
+    cancel: CancellationToken,
+}
+
+#[derive(Clone)]
+pub struct IndexerMetrics {
+    // Statistics related to fetching data from the remote store.
+    pub total_ingested_checkpoints: IntCounter,
+    pub total_ingested_transactions: IntCounter,
+    pub total_ingested_events: IntCounter,
+    pub total_ingested_inputs: IntCounter,
+    pub total_ingested_outputs: IntCounter,
+    pub total_ingested_bytes: IntCounter,
+    pub total_ingested_retries: IntCounter,
+
+    // Distribution of times taken to fetch data from the remote store, including time taken on
+    // retries.
+    pub ingested_checkpoint_latency: Histogram,
+}
+
+impl MetricsService {
+    /// Create a new metrics service, exposing Mysten-wide metrics, and Indexer-specific metrics.
+    /// Returns the Indexer-specific metrics and the service itself (which must be run with
+    /// [Self::run]).
+    pub fn new(
+        addr: SocketAddr,
+        cancel: CancellationToken,
+    ) -> Result<(IndexerMetrics, MetricsService)> {
+        let registry = Registry::new_custom(Some("indexer_alt".to_string()), None)?;
+        let metrics = IndexerMetrics::new(&registry);
+        mysten_metrics::init_metrics(&registry);
+
+        let service = Self {
+            addr,
+            service: RegistryService::new(registry),
+            cancel,
+        };
+
+        Ok((metrics, service))
+    }
+
+    /// Start the service. The service will run until the cancellation token is triggered.
+    pub async fn run(self) -> Result<JoinHandle<()>> {
+        let listener = TcpListener::bind(&self.addr).await?;
+        let app = Router::new()
+            .route("/metrics", get(mysten_metrics::metrics))
+            .layer(Extension(self.service));
+
+        Ok(tokio::spawn(async move {
+            info!("Starting metrics service on {}", self.addr);
+            axum::serve(listener, app)
+                .with_graceful_shutdown(async move {
+                    self.cancel.cancelled().await;
+                    info!("Shutdown received, stopping metrics service.");
+                })
+                .await
+                .unwrap();
+        }))
+    }
+}
+
+impl IndexerMetrics {
+    pub fn new(registry: &Registry) -> Self {
+        Self {
+            total_ingested_checkpoints: register_int_counter_with_registry!(
+                "indexer_total_ingested_checkpoints",
+                "Total number of checkpoints fetched from the remote store",
+                registry,
+            )
+            .unwrap(),
+            total_ingested_transactions: register_int_counter_with_registry!(
+                "indexer_total_ingested_transactions",
+                "Total number of transactions fetched from the remote store",
+                registry,
+            )
+            .unwrap(),
+            total_ingested_events: register_int_counter_with_registry!(
+                "indexer_total_ingested_events",
+                "Total number of events fetched from the remote store",
+                registry,
+            )
+            .unwrap(),
+            total_ingested_inputs: register_int_counter_with_registry!(
+                "indexer_total_ingested_inputs",
+                "Total number of input objects fetched from the remote store",
+                registry,
+            )
+            .unwrap(),
+            total_ingested_outputs: register_int_counter_with_registry!(
+                "indexer_total_ingested_outputs",
+                "Total number of output objects fetched from the remote store",
+                registry,
+            )
+            .unwrap(),
+            total_ingested_bytes: register_int_counter_with_registry!(
+                "indexer_total_ingested_bytes",
+                "Total number of bytes fetched from the remote store",
+                registry,
+            )
+            .unwrap(),
+            total_ingested_retries: register_int_counter_with_registry!(
+                "indexer_total_ingested_retries",
+                "Total number of retries fetching data from the remote store",
+                registry,
+            )
+            .unwrap(),
+            ingested_checkpoint_latency: register_histogram_with_registry!(
+                "indexer_ingested_checkpoint_latency",
+                "Time taken to fetch a checkpoint from the remote store, including retries",
+                INGESTION_LATENCY_SEC_BUCKETS.to_vec(),
+                registry,
+            )
+            .unwrap(),
+        }
+    }
+}
+
+#[cfg(test)]
+pub(crate) mod tests {
+    use prometheus::Registry;
+
+    use super::IndexerMetrics;
+
+    /// Construct metrics for test purposes.
+    pub fn test_metrics() -> IndexerMetrics {
+        IndexerMetrics::new(&Registry::new())
+    }
+}

From 7405f903fad27881ea2d92ea2a26833ca64a93d1 Mon Sep 17 00:00:00 2001
From: Ashok Menon <ashok@mystenlabs.com>
Date: Sun, 13 Oct 2024 22:54:55 +0100
Subject: [PATCH 06/33] indexer-alt: ingestion service

## Description

A service the fetches checkpoints and pushes them down to subscribers to
process.

## Test plan

Created a dummy ingester that prints the checkpoint sequence number and
digest to test throughput. This was run for 100s on my local machine at
two different points in mainnet's history, around checkpoint 9M, and
checkpoint 64M. The former is around the time of Sui8192 and when
on-chain checkpoint rate was around 1 per second, and the latter is from
the FOMO mint when checkpoint rate on-chain was around 5 per second:

```
sui$ cargo run -p sui-indexer-alt --release --          \
  --remote-store-url https://checkpoints.mainnet.sui.io \
  --start-checkpoint $CP
```

Locally, this simple ingester was limited in both cases by bandwidth
(around 100Mbps), which translated to a checkpoint rates of roughly 70
and 160 per second at checkpoints 9M and 64M respectively.

In a follow-up PR, I introduce an equivalent dummy ingester to the
existing data ingestion framework, and it reached rates of around 50 and
130 per second. It's possible that this is still due to bandwidth,
because the existing framework can end up dropping a partially
downloaded checkpoint if it times out (and has quite an aggressive
timeout).
---
 Cargo.lock                                  |   1 +
 crates/sui-indexer-alt/Cargo.toml           |   1 +
 crates/sui-indexer-alt/src/args.rs          |  11 +-
 crates/sui-indexer-alt/src/ingestion/mod.rs | 234 ++++++++++++++++++--
 crates/sui-indexer-alt/src/main.rs          |  60 ++---
 crates/sui-indexer-alt/src/metrics.rs       |  15 +-
 6 files changed, 260 insertions(+), 62 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 68b19a75c46f7..6dedd6ec1ecdf 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -13885,6 +13885,7 @@ dependencies = [
  "backoff",
  "bcs",
  "clap",
+ "futures",
  "mysten-metrics",
  "prometheus",
  "reqwest 0.12.5",
diff --git a/crates/sui-indexer-alt/Cargo.toml b/crates/sui-indexer-alt/Cargo.toml
index 12a08ec2ee0a7..88fcf64e061f1 100644
--- a/crates/sui-indexer-alt/Cargo.toml
+++ b/crates/sui-indexer-alt/Cargo.toml
@@ -16,6 +16,7 @@ axum.workspace = true
 backoff.workspace = true
 bcs.workspace = true
 clap.workspace = true
+futures.workspace = true
 prometheus.workspace = true
 reqwest.workspace = true
 serde.workspace = true
diff --git a/crates/sui-indexer-alt/src/args.rs b/crates/sui-indexer-alt/src/args.rs
index 282bb847c25f3..e288af455d67c 100644
--- a/crates/sui-indexer-alt/src/args.rs
+++ b/crates/sui-indexer-alt/src/args.rs
@@ -3,17 +3,12 @@
 
 use std::net::SocketAddr;
 
-use url::Url;
+use crate::ingestion::IngestionConfig;
 
 #[derive(clap::Parser, Debug, Clone)]
 pub struct Args {
-    /// First checkpoint to start indexing from.
-    #[arg(long, default_value_t = 0)]
-    pub start: u64,
-
-    /// Remote Store to fetch CheckpointData from.
-    #[arg(long)]
-    pub remote_store_url: Url,
+    #[command(flatten)]
+    pub ingestion: IngestionConfig,
 
     /// Address to serve Prometheus Metrics from.
     #[arg(long, default_value = "0.0.0.0:9184")]
diff --git a/crates/sui-indexer-alt/src/ingestion/mod.rs b/crates/sui-indexer-alt/src/ingestion/mod.rs
index f9e7059a4255d..42b7ff2d8baa4 100644
--- a/crates/sui-indexer-alt/src/ingestion/mod.rs
+++ b/crates/sui-indexer-alt/src/ingestion/mod.rs
@@ -3,21 +3,64 @@
 
 use std::{sync::Arc, time::Duration};
 
-use backoff::ExponentialBackoff;
+use backoff::{backoff::Constant, ExponentialBackoff};
+use futures::{future::try_join_all, stream, StreamExt, TryStreamExt};
+use mysten_metrics::spawn_monitored_task;
 use reqwest::{Client, StatusCode};
 use sui_storage::blob::Blob;
 use sui_types::full_checkpoint_content::CheckpointData;
+use tokio::{sync::mpsc, task::JoinHandle};
+use tokio_util::sync::CancellationToken;
+use tracing::{debug, error, info};
 use url::Url;
 
 use crate::metrics::IndexerMetrics;
 
 /// Wait at most this long between retries for transient errors.
-const MAX_RETRY_INTERVAL: Duration = Duration::from_secs(60);
+const MAX_TRANSIENT_RETRY_INTERVAL: Duration = Duration::from_secs(60);
+
+pub struct IngestionService {
+    config: IngestionConfig,
+    client: IngestionClient,
+    metrics: Arc<IndexerMetrics>,
+    subscribers: Vec<mpsc::Sender<Arc<CheckpointData>>>,
+    cancel: CancellationToken,
+}
 
-pub struct IngestionClient {
+#[derive(Clone)]
+struct IngestionClient {
     url: Url,
     client: Client,
-    metrics: IndexerMetrics,
+    /// Wrap the metrics in an `Arc` to keep copies of the client cheap.
+    metrics: Arc<IndexerMetrics>,
+}
+
+#[derive(clap::Args, Debug, Clone)]
+pub struct IngestionConfig {
+    /// Remote Store to fetch checkpoints from.
+    #[arg(long)]
+    remote_store_url: Url,
+
+    /// First checkpoint to start ingestion from.
+    #[arg(long, default_value_t = 0)]
+    start_checkpoint: u64,
+
+    /// Maximum size of checkpoint backlog across all workers downstream of the ingestion service.
+    #[arg(long, default_value_t = 5000)]
+    buffer_size: usize,
+
+    /// Maximum number of checkpoint to attempt to fetch concurrently.
+    #[arg(long, default_value_t = 200)]
+    concurrency: usize,
+
+    /// Polling interval to retry fetching checkpoints that do not exist.
+    #[arg(
+        long,
+        default_value = "200",
+        value_name = "MILLISECONDS",
+        value_parser = |s: &str| s.parse().map(Duration::from_millis)
+    )]
+    retry_interval: Duration,
 }
 
 type Result<T> = std::result::Result<T, Error>;
@@ -35,10 +78,142 @@ pub enum Error {
 
     #[error(transparent)]
     ReqwestError(#[from] reqwest::Error),
+
+    #[error("No subscribers for ingestion service")]
+    NoSubscribers,
+}
+
+impl IngestionService {
+    pub fn new(
+        config: IngestionConfig,
+        metrics: IndexerMetrics,
+        cancel: CancellationToken,
+    ) -> Result<Self> {
+        let metrics = Arc::new(metrics);
+        Ok(Self {
+            client: IngestionClient::new(config.remote_store_url.clone(), metrics.clone())?,
+            subscribers: Vec::new(),
+            config,
+            metrics,
+            cancel,
+        })
+    }
+
+    /// Add a new subscription to the ingestion service. Note that the service is susceptible to
+    /// the "slow receiver" problem: If one receiver is slower to process checkpoints than the
+    /// checkpoint ingestion rate, it will eventually hold up all receivers.
+    pub fn subscribe(&mut self) -> mpsc::Receiver<Arc<CheckpointData>> {
+        let (sender, receiver) = mpsc::channel(self.config.buffer_size);
+        self.subscribers.push(sender);
+        receiver
+    }
+
+    /// Start the ingestion service as a background task, consuming it in the process.
+    ///
+    /// Checkpoints are fetched concurrently starting with the configured `start_checkpoint`, and
+    /// pushed to subscribers' channels (potentially out-of-order). Subscribers can communicate
+    /// with the ingestion service via their channels in the following ways:
+    ///
+    /// - If a subscriber is lagging (not receiving checkpoints fast enough), it will eventually
+    ///   provide back-pressure to the ingestion service, which will stop fetching new checkpoints.
+    /// - If a subscriber closes its channel, the ingestion service will intepret that as a signal
+    ///   to shutdown as well.
+    ///
+    /// If ingestion reaches the leading edge of the network, it will encounter checkpoints that do
+    /// not exist yet. These will be retried repeatedly on a fixed `retry_interval` until they
+    /// become available.
+    pub async fn run(self) -> Result<JoinHandle<()>> {
+        let IngestionService {
+            config,
+            client,
+            metrics,
+            subscribers,
+            cancel,
+        } = self;
+
+        /// Individual iterations of `try_for_each_concurrent` communicate with the supervisor by
+        /// returning an `Err` with a `Break` variant.
+        enum Break {
+            Cancelled,
+            Err(u64, Error),
+        }
+
+        if subscribers.is_empty() {
+            return Err(Error::NoSubscribers);
+        }
+
+        Ok(spawn_monitored_task!(async move {
+            let start = config.start_checkpoint;
+            info!(start, "Starting ingestion service");
+
+            match stream::iter(start..)
+                .map(Ok)
+                .try_for_each_concurrent(/* limit */ config.concurrency, |cp| {
+                    let client = client.clone();
+                    let metrics = metrics.clone();
+                    let subscribers = subscribers.clone();
+
+                    // One clone is for the supervisor to signal a cancel if it detects a
+                    // subscriber that wants to wind down ingestion, and the other is to pass to
+                    // each worker to detect cancellation.
+                    let supervisor_cancel = cancel.clone();
+                    let cancel = cancel.clone();
+
+                    // Repeatedly retry if the checkpoint is not found, assuming that we are at the
+                    // tip of the network and it will become available soon.
+                    let backoff = Constant::new(config.retry_interval);
+                    let fetch = move || {
+                        let client = client.clone();
+                        let metrics = metrics.clone();
+                        let cancel = cancel.clone();
+
+                        async move {
+                            use backoff::Error as BE;
+                            if cancel.is_cancelled() {
+                                return Err(BE::permanent(Break::Cancelled));
+                            }
+
+                            client.fetch(cp).await.map_err(|e| match e {
+                                Error::NotFound(checkpoint) => {
+                                    debug!(checkpoint, "Checkpoint not found, retrying...");
+                                    metrics.total_ingested_not_found_retries.inc();
+                                    BE::transient(Break::Err(cp, e))
+                                }
+                                e => BE::permanent(Break::Err(cp, e)),
+                            })
+                        }
+                    };
+
+                    async move {
+                        let checkpoint = backoff::future::retry(backoff, fetch).await?;
+                        let futures = subscribers.iter().map(|s| s.send(checkpoint.clone()));
+
+                        if try_join_all(futures).await.is_err() {
+                            info!("Subscription dropped, signalling shutdown");
+                            supervisor_cancel.cancel();
+                            Err(Break::Cancelled)
+                        } else {
+                            Ok(())
+                        }
+                    }
+                })
+                .await
+            {
+                Ok(()) => {}
+                Err(Break::Cancelled) => {
+                    info!("Shutdown received, stopping ingestion service");
+                }
+
+                Err(Break::Err(checkpoint, e)) => {
+                    error!(checkpoint, "Ingestion service failed: {}", e);
+                }
+            }
+        }))
+    }
 }
 
 impl IngestionClient {
-    pub fn new(url: Url, metrics: IndexerMetrics) -> Result<Self> {
+    fn new(url: Url, metrics: Arc<IndexerMetrics>) -> Result<Self> {
         Ok(Self {
             url,
             client: Client::builder().build()?,
@@ -49,7 +224,7 @@ impl IngestionClient {
     /// Fetch a checkpoint from the remote store. Repeatedly retries transient errors with an
     /// exponential backoff (up to [MAX_RETRY_INTERVAL]), but will immediately return
     /// non-transient errors, which include all client errors, except timeouts and rate limiting.
-    pub async fn fetch(&self, checkpoint: u64) -> Result<Arc<CheckpointData>> {
+    async fn fetch(&self, checkpoint: u64) -> Result<Arc<CheckpointData>> {
         // SAFETY: The path being joined is statically known to be valid.
         let url = self
             .url
@@ -71,82 +246,95 @@ impl IngestionClient {
                     code if code.is_success() => Ok(response),
 
                     // Treat 404s as a special case so we can match on this error type.
-                    StatusCode::NOT_FOUND => Err(BE::Permanent(Error::NotFound(checkpoint))),
+                    code @ StatusCode::NOT_FOUND => {
+                        debug!(checkpoint, %code, "Checkpoint not found");
+                        Err(BE::permanent(Error::NotFound(checkpoint)))
+                    }
 
                     // Timeouts are a client error but they are usually transient.
                     code @ StatusCode::REQUEST_TIMEOUT => {
-                        self.metrics.total_ingested_retries.inc();
+                        debug!(checkpoint, %code, "Transient error, retrying...");
+                        self.metrics.total_ingested_transient_retries.inc();
                         Err(BE::transient(Error::HttpError(checkpoint, code)))
                     }
 
                     // Rate limiting is also a client error, but the backoff will eventually widen the
                     // interval appropriately.
                     code @ StatusCode::TOO_MANY_REQUESTS => {
-                        self.metrics.total_ingested_retries.inc();
+                        debug!(checkpoint, %code, "Transient error, retrying...");
+                        self.metrics.total_ingested_transient_retries.inc();
                         Err(BE::transient(Error::HttpError(checkpoint, code)))
                     }
 
                     // Assume that if the server is facing difficulties, it will recover eventually.
                     code if code.is_server_error() => {
-                        self.metrics.total_ingested_retries.inc();
+                        debug!(checkpoint, %code, "Transient error, retrying...");
+                        self.metrics.total_ingested_transient_retries.inc();
                         Err(BE::transient(Error::HttpError(checkpoint, code)))
                     }
 
                     // For everything else, assume it's a permanent error and don't retry.
-                    code => Err(BE::Permanent(Error::HttpError(checkpoint, code))),
+                    code => {
+                        debug!(checkpoint, %code, "Permanent error, giving up!");
+                        Err(BE::permanent(Error::HttpError(checkpoint, code)))
+                    }
                 }
             }
         };
 
         // Keep backing off until we are waiting for the max interval, but don't give up.
         let backoff = ExponentialBackoff {
-            max_interval: MAX_RETRY_INTERVAL,
+            max_interval: MAX_TRANSIENT_RETRY_INTERVAL,
             max_elapsed_time: None,
             ..Default::default()
         };
 
-        let _guard = self.metrics.ingested_checkpoint_latency.start_timer();
+        let guard = self.metrics.ingested_checkpoint_latency.start_timer();
 
         let bytes = backoff::future::retry(backoff, request)
             .await?
             .bytes()
             .await?;
 
-        let checkpoint: CheckpointData =
+        let data: CheckpointData =
             Blob::from_bytes(&bytes).map_err(|e| Error::DeserializationError(checkpoint, e))?;
 
+        let elapsed = guard.stop_and_record();
+        debug!(
+            checkpoint,
+            "Fetched checkpoint in {:.03}ms",
+            elapsed * 1000.0
+        );
+
         self.metrics.total_ingested_checkpoints.inc();
         self.metrics.total_ingested_bytes.inc_by(bytes.len() as u64);
 
         self.metrics
             .total_ingested_transactions
-            .inc_by(checkpoint.transactions.len() as u64);
+            .inc_by(data.transactions.len() as u64);
 
         self.metrics.total_ingested_events.inc_by(
-            checkpoint
-                .transactions
+            data.transactions
                 .iter()
                 .map(|tx| tx.events.as_ref().map_or(0, |evs| evs.data.len()) as u64)
                 .sum(),
         );
 
         self.metrics.total_ingested_inputs.inc_by(
-            checkpoint
-                .transactions
+            data.transactions
                 .iter()
                 .map(|tx| tx.input_objects.len() as u64)
                 .sum(),
         );
 
         self.metrics.total_ingested_outputs.inc_by(
-            checkpoint
-                .transactions
+            data.transactions
                 .iter()
                 .map(|tx| tx.output_objects.len() as u64)
                 .sum(),
         );
 
-        Ok(Arc::new(checkpoint))
+        Ok(Arc::new(data))
     }
 }
 
@@ -176,7 +364,7 @@ mod tests {
     }
 
     fn test_client(uri: String) -> IngestionClient {
-        IngestionClient::new(Url::parse(&uri).unwrap(), test_metrics()).unwrap()
+        IngestionClient::new(Url::parse(&uri).unwrap(), Arc::new(test_metrics())).unwrap()
     }
 
     #[tokio::test]
diff --git a/crates/sui-indexer-alt/src/main.rs b/crates/sui-indexer-alt/src/main.rs
index 54b186d62547e..a9b8e585fc5b3 100644
--- a/crates/sui-indexer-alt/src/main.rs
+++ b/crates/sui-indexer-alt/src/main.rs
@@ -3,8 +3,9 @@
 
 use anyhow::{Context, Result};
 use clap::Parser;
-use sui_indexer_alt::{args::Args, ingestion::IngestionClient, metrics::MetricsService};
-use tokio::signal;
+use mysten_metrics::spawn_monitored_task;
+use sui_indexer_alt::{args::Args, ingestion::IngestionService, metrics::MetricsService};
+use tokio::{signal, task::JoinHandle};
 use tokio_util::sync::CancellationToken;
 use tracing::info;
 
@@ -20,41 +21,46 @@ async fn main() -> Result<()> {
     let cancel = CancellationToken::new();
 
     let (metrics, metrics_service) = MetricsService::new(args.metrics_address, cancel.clone())?;
+    let mut ingestion_service =
+        IngestionService::new(args.ingestion, metrics.clone(), cancel.clone())?;
 
     let metrics_handle = metrics_service
         .run()
         .await
         .context("Failed to start metrics service")?;
 
-    info!("Fetching {}", args.remote_store_url);
-
-    let client = IngestionClient::new(args.remote_store_url, metrics.clone())?;
-    let checkpoint = client.fetch(args.start).await?;
-
-    info!(
-        txs = checkpoint.transactions.len(),
-        evs = checkpoint
-            .transactions
-            .iter()
-            .map(|tx| tx.events.as_ref().map_or(0, |evs| evs.data.len()))
-            .sum::<usize>(),
-        ins = checkpoint
-            .transactions
-            .iter()
-            .map(|tx| tx.input_objects.len())
-            .sum::<usize>(),
-        outs = checkpoint
-            .transactions
-            .iter()
-            .map(|tx| tx.output_objects.len())
-            .sum::<usize>(),
-        "Fetch checkpoint {}",
-        args.start,
-    );
+    let ingester_handle = digest_ingester(&mut ingestion_service, cancel.clone());
+
+    let ingestion_handle = ingestion_service
+        .run()
+        .await
+        .context("Failed to start ingestion service")?;
 
     // Once we receive a Ctrl-C, notify all services to shutdown, and wait for them to finish.
     signal::ctrl_c().await.unwrap();
     cancel.cancel();
+    ingester_handle.await.unwrap();
     metrics_handle.await.unwrap();
+    ingestion_handle.await.unwrap();
+
     Ok(())
 }
+
+/// Test ingester which logs the digests of checkpoints it receives.
+fn digest_ingester(ingestion: &mut IngestionService, cancel: CancellationToken) -> JoinHandle<()> {
+    let mut rx = ingestion.subscribe();
+    spawn_monitored_task!(async move {
+        info!("Starting checkpoint digest ingester");
+        loop {
+            tokio::select! {
+                _ = cancel.cancelled() => break,
+                Some(checkpoint) = rx.recv() => {
+                    let cp = checkpoint.checkpoint_summary.sequence_number;
+                    let digest = checkpoint.checkpoint_summary.content_digest;
+                    info!("{cp}: {digest}");
+                }
+            }
+        }
+        info!("Shutdown received, stopping digest ingester");
+    })
+}
diff --git a/crates/sui-indexer-alt/src/metrics.rs b/crates/sui-indexer-alt/src/metrics.rs
index c1a27fdb726ae..4d89444b32126 100644
--- a/crates/sui-indexer-alt/src/metrics.rs
+++ b/crates/sui-indexer-alt/src/metrics.rs
@@ -35,7 +35,8 @@ pub struct IndexerMetrics {
     pub total_ingested_inputs: IntCounter,
     pub total_ingested_outputs: IntCounter,
     pub total_ingested_bytes: IntCounter,
-    pub total_ingested_retries: IntCounter,
+    pub total_ingested_transient_retries: IntCounter,
+    pub total_ingested_not_found_retries: IntCounter,
 
     // Distribution of times taken to fetch data from the remote store, including time taken on
     // retries.
@@ -75,7 +76,7 @@ impl MetricsService {
             axum::serve(listener, app)
                 .with_graceful_shutdown(async move {
                     self.cancel.cancelled().await;
-                    info!("Shutdown received, stopping metrics service.");
+                    info!("Shutdown received, stopping metrics service");
                 })
                 .await
                 .unwrap();
@@ -122,9 +123,15 @@ impl IndexerMetrics {
                 registry,
             )
             .unwrap(),
-            total_ingested_retries: register_int_counter_with_registry!(
+            total_ingested_transient_retries: register_int_counter_with_registry!(
                 "indexer_total_ingested_retries",
-                "Total number of retries fetching data from the remote store",
+                "Total number of retries due to transient errors while fetching data from the remote store",
+                registry,
+            )
+            .unwrap(),
+            total_ingested_not_found_retries: register_int_counter_with_registry!(
+                "indexer_total_ingested_not_found_retries",
+                "Total number of retries due to the not found errors while fetching data from the remote store",
                 registry,
             )
             .unwrap(),

From dc9a6dcda8a045a2699d65d4aad82ea8790b533e Mon Sep 17 00:00:00 2001
From: Ashok Menon <ashok@mystenlabs.com>
Date: Sun, 13 Oct 2024 23:07:30 +0100
Subject: [PATCH 07/33] indexer: dummy backfill, printing digests

## Description

Creating an ingestion backfill that just prints the checkpoint digest,
for comparison with the dummy ingester in `sui-indexer-alt`.

## Test plan

See previous commit for details -- this backfill is run with:

```
sui$ cargo run --bin sui-indexer --release --                             \
  --database-url postgres://postgres:postgespw@localhost:5432/sui_indexer \
  run-back-fill $CP 70000000                                              \
  ingestion digest https://checkpoints.mainnet.sui.io
```
---
 .../ingestion_backfills/digest_task.rs        | 26 +++++++++++++++++++
 .../ingestion_backfills/mod.rs                |  1 +
 .../src/backfill/backfill_instances/mod.rs    |  8 ++++++
 crates/sui-indexer/src/backfill/mod.rs        |  1 +
 4 files changed, 36 insertions(+)
 create mode 100644 crates/sui-indexer/src/backfill/backfill_instances/ingestion_backfills/digest_task.rs

diff --git a/crates/sui-indexer/src/backfill/backfill_instances/ingestion_backfills/digest_task.rs b/crates/sui-indexer/src/backfill/backfill_instances/ingestion_backfills/digest_task.rs
new file mode 100644
index 0000000000000..8273bcdaa3b7b
--- /dev/null
+++ b/crates/sui-indexer/src/backfill/backfill_instances/ingestion_backfills/digest_task.rs
@@ -0,0 +1,26 @@
+// Copyright (c) Mysten Labs, Inc.
+// SPDX-License-Identifier: Apache-2.0
+
+use crate::backfill::backfill_instances::ingestion_backfills::IngestionBackfillTrait;
+use crate::database::ConnectionPool;
+use sui_types::full_checkpoint_content::CheckpointData;
+use tracing::info;
+
+/// Dummy backfill that only prints the sequence number and checkpoint of the digest. Intended to
+/// benchmark backfill performance.
+pub struct DigestBackfill;
+
+#[async_trait::async_trait]
+impl IngestionBackfillTrait for DigestBackfill {
+    type ProcessedType = ();
+
+    fn process_checkpoint(checkpoint: &CheckpointData) -> Vec<Self::ProcessedType> {
+        let cp = checkpoint.checkpoint_summary.sequence_number;
+        let digest = checkpoint.checkpoint_summary.content_digest;
+        info!("{cp}: {digest}");
+
+        vec![]
+    }
+
+    async fn commit_chunk(_pool: ConnectionPool, _processed_data: Vec<Self::ProcessedType>) {}
+}
diff --git a/crates/sui-indexer/src/backfill/backfill_instances/ingestion_backfills/mod.rs b/crates/sui-indexer/src/backfill/backfill_instances/ingestion_backfills/mod.rs
index 17bbc29d7dc5c..935ba5562bd9c 100644
--- a/crates/sui-indexer/src/backfill/backfill_instances/ingestion_backfills/mod.rs
+++ b/crates/sui-indexer/src/backfill/backfill_instances/ingestion_backfills/mod.rs
@@ -1,6 +1,7 @@
 // Copyright (c) Mysten Labs, Inc.
 // SPDX-License-Identifier: Apache-2.0
 
+pub(crate) mod digest_task;
 pub(crate) mod ingestion_backfill_task;
 pub(crate) mod raw_checkpoints;
 pub(crate) mod tx_affected_objects;
diff --git a/crates/sui-indexer/src/backfill/backfill_instances/mod.rs b/crates/sui-indexer/src/backfill/backfill_instances/mod.rs
index 27c96dd6c9234..304ed4e715e1d 100644
--- a/crates/sui-indexer/src/backfill/backfill_instances/mod.rs
+++ b/crates/sui-indexer/src/backfill/backfill_instances/mod.rs
@@ -1,6 +1,7 @@
 // Copyright (c) Mysten Labs, Inc.
 // SPDX-License-Identifier: Apache-2.0
 
+use crate::backfill::backfill_instances::ingestion_backfills::digest_task::DigestBackfill;
 use crate::backfill::backfill_instances::ingestion_backfills::ingestion_backfill_task::IngestionBackfillTask;
 use crate::backfill::backfill_instances::ingestion_backfills::raw_checkpoints::RawCheckpointsBackFill;
 use crate::backfill::backfill_instances::ingestion_backfills::tx_affected_objects::TxAffectedObjectsBackfill;
@@ -28,6 +29,13 @@ pub async fn get_backfill_task(
             kind,
             remote_store_url,
         } => match kind {
+            IngestionBackfillKind::Digest => Arc::new(
+                IngestionBackfillTask::<DigestBackfill>::new(
+                    remote_store_url,
+                    range_start as CheckpointSequenceNumber,
+                )
+                .await,
+            ),
             IngestionBackfillKind::RawCheckpoints => Arc::new(
                 IngestionBackfillTask::<RawCheckpointsBackFill>::new(
                     remote_store_url,
diff --git a/crates/sui-indexer/src/backfill/mod.rs b/crates/sui-indexer/src/backfill/mod.rs
index 453d11baeeed2..e17ba40628ef1 100644
--- a/crates/sui-indexer/src/backfill/mod.rs
+++ b/crates/sui-indexer/src/backfill/mod.rs
@@ -29,6 +29,7 @@ pub enum BackfillTaskKind {
 
 #[derive(ValueEnum, Clone, Debug)]
 pub enum IngestionBackfillKind {
+    Digest,
     RawCheckpoints,
     TxAffectedObjects,
 }

From b24162ad0679abc0296bc65a94617e33d616541e Mon Sep 17 00:00:00 2001
From: Ashok Menon <ashok@mystenlabs.com>
Date: Wed, 16 Oct 2024 22:06:56 +0100
Subject: [PATCH 08/33] chore(indexer-alt): factor out ingestion clients and
 errors

---
 .../sui-indexer-alt/src/ingestion/client.rs   | 232 +++++++++++++++++
 crates/sui-indexer-alt/src/ingestion/error.rs |  24 ++
 crates/sui-indexer-alt/src/ingestion/mod.rs   | 245 +-----------------
 3 files changed, 261 insertions(+), 240 deletions(-)
 create mode 100644 crates/sui-indexer-alt/src/ingestion/client.rs
 create mode 100644 crates/sui-indexer-alt/src/ingestion/error.rs

diff --git a/crates/sui-indexer-alt/src/ingestion/client.rs b/crates/sui-indexer-alt/src/ingestion/client.rs
new file mode 100644
index 0000000000000..48ce6bb1ca505
--- /dev/null
+++ b/crates/sui-indexer-alt/src/ingestion/client.rs
@@ -0,0 +1,232 @@
+// Copyright (c) Mysten Labs, Inc.
+// SPDX-License-Identifier: Apache-2.0
+
+use std::{sync::Arc, time::Duration};
+
+use backoff::ExponentialBackoff;
+use reqwest::{Client, StatusCode};
+use sui_storage::blob::Blob;
+use sui_types::full_checkpoint_content::CheckpointData;
+use tracing::debug;
+use url::Url;
+
+use crate::ingestion::error::{Error, Result};
+use crate::metrics::IndexerMetrics;
+
+/// Wait at most this long between retries for transient errors.
+const MAX_TRANSIENT_RETRY_INTERVAL: Duration = Duration::from_secs(60);
+
+#[derive(Clone)]
+pub(crate) struct IngestionClient {
+    url: Url,
+    client: Client,
+    /// Wrap the metrics in an `Arc` to keep copies of the client cheap.
+    metrics: Arc<IndexerMetrics>,
+}
+
+impl IngestionClient {
+    pub(crate) fn new(url: Url, metrics: Arc<IndexerMetrics>) -> Result<Self> {
+        Ok(Self {
+            url,
+            client: Client::builder().build()?,
+            metrics,
+        })
+    }
+
+    /// Fetch a checkpoint from the remote store. Repeatedly retries transient errors with an
+    /// exponential backoff (up to [MAX_RETRY_INTERVAL]), but will immediately return
+    /// non-transient errors, which include all client errors, except timeouts and rate limiting.
+    pub(crate) async fn fetch(&self, checkpoint: u64) -> Result<Arc<CheckpointData>> {
+        // SAFETY: The path being joined is statically known to be valid.
+        let url = self
+            .url
+            .join(&format!("/{checkpoint}.chk"))
+            .expect("Unexpected invalid URL");
+
+        let request = move || {
+            let url = url.clone();
+            async move {
+                let response = self
+                    .client
+                    .get(url)
+                    .send()
+                    .await
+                    .expect("Unexpected error building request");
+
+                use backoff::Error as BE;
+                match response.status() {
+                    code if code.is_success() => Ok(response),
+
+                    // Treat 404s as a special case so we can match on this error type.
+                    code @ StatusCode::NOT_FOUND => {
+                        debug!(checkpoint, %code, "Checkpoint not found");
+                        Err(BE::permanent(Error::NotFound(checkpoint)))
+                    }
+
+                    // Timeouts are a client error but they are usually transient.
+                    code @ StatusCode::REQUEST_TIMEOUT => {
+                        debug!(checkpoint, %code, "Transient error, retrying...");
+                        self.metrics.total_ingested_transient_retries.inc();
+                        Err(BE::transient(Error::HttpError(checkpoint, code)))
+                    }
+
+                    // Rate limiting is also a client error, but the backoff will eventually widen the
+                    // interval appropriately.
+                    code @ StatusCode::TOO_MANY_REQUESTS => {
+                        debug!(checkpoint, %code, "Transient error, retrying...");
+                        self.metrics.total_ingested_transient_retries.inc();
+                        Err(BE::transient(Error::HttpError(checkpoint, code)))
+                    }
+
+                    // Assume that if the server is facing difficulties, it will recover eventually.
+                    code if code.is_server_error() => {
+                        debug!(checkpoint, %code, "Transient error, retrying...");
+                        self.metrics.total_ingested_transient_retries.inc();
+                        Err(BE::transient(Error::HttpError(checkpoint, code)))
+                    }
+
+                    // For everything else, assume it's a permanent error and don't retry.
+                    code => {
+                        debug!(checkpoint, %code, "Permanent error, giving up!");
+                        Err(BE::permanent(Error::HttpError(checkpoint, code)))
+                    }
+                }
+            }
+        };
+
+        // Keep backing off until we are waiting for the max interval, but don't give up.
+        let backoff = ExponentialBackoff {
+            max_interval: MAX_TRANSIENT_RETRY_INTERVAL,
+            max_elapsed_time: None,
+            ..Default::default()
+        };
+
+        let guard = self.metrics.ingested_checkpoint_latency.start_timer();
+
+        let bytes = backoff::future::retry(backoff, request)
+            .await?
+            .bytes()
+            .await?;
+
+        let data: CheckpointData =
+            Blob::from_bytes(&bytes).map_err(|e| Error::DeserializationError(checkpoint, e))?;
+
+        let elapsed = guard.stop_and_record();
+        debug!(
+            checkpoint,
+            "Fetched checkpoint in {:.03}ms",
+            elapsed * 1000.0
+        );
+
+        self.metrics.total_ingested_checkpoints.inc();
+        self.metrics.total_ingested_bytes.inc_by(bytes.len() as u64);
+
+        self.metrics
+            .total_ingested_transactions
+            .inc_by(data.transactions.len() as u64);
+
+        self.metrics.total_ingested_events.inc_by(
+            data.transactions
+                .iter()
+                .map(|tx| tx.events.as_ref().map_or(0, |evs| evs.data.len()) as u64)
+                .sum(),
+        );
+
+        self.metrics.total_ingested_inputs.inc_by(
+            data.transactions
+                .iter()
+                .map(|tx| tx.input_objects.len() as u64)
+                .sum(),
+        );
+
+        self.metrics.total_ingested_outputs.inc_by(
+            data.transactions
+                .iter()
+                .map(|tx| tx.output_objects.len() as u64)
+                .sum(),
+        );
+
+        Ok(Arc::new(data))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::sync::Mutex;
+
+    use wiremock::{
+        matchers::{method, path_regex},
+        Mock, MockServer, Request, Respond, ResponseTemplate,
+    };
+
+    use crate::metrics::tests::test_metrics;
+
+    use super::*;
+
+    async fn respond_with(server: &MockServer, response: impl Respond + 'static) {
+        Mock::given(method("GET"))
+            .and(path_regex(r"/\d+.chk"))
+            .respond_with(response)
+            .mount(server)
+            .await;
+    }
+
+    fn status(code: StatusCode) -> ResponseTemplate {
+        ResponseTemplate::new(code.as_u16())
+    }
+
+    fn test_client(uri: String) -> IngestionClient {
+        IngestionClient::new(Url::parse(&uri).unwrap(), Arc::new(test_metrics())).unwrap()
+    }
+
+    #[tokio::test]
+    async fn not_found() {
+        let server = MockServer::start().await;
+        respond_with(&server, status(StatusCode::NOT_FOUND)).await;
+
+        let client = test_client(server.uri());
+        let error = client.fetch(42).await.unwrap_err();
+
+        assert!(matches!(error, Error::NotFound(42)));
+    }
+
+    #[tokio::test]
+    async fn client_error() {
+        let server = MockServer::start().await;
+        respond_with(&server, status(StatusCode::IM_A_TEAPOT)).await;
+
+        let client = test_client(server.uri());
+        let error = client.fetch(42).await.unwrap_err();
+
+        assert!(matches!(
+            error,
+            Error::HttpError(42, StatusCode::IM_A_TEAPOT)
+        ));
+    }
+
+    #[tokio::test]
+    async fn transient_server_error() {
+        let server = MockServer::start().await;
+
+        let times: Mutex<u64> = Mutex::new(0);
+        respond_with(&server, move |_: &Request| {
+            let mut times = times.lock().unwrap();
+            *times += 1;
+            status(match *times {
+                1 => StatusCode::INTERNAL_SERVER_ERROR,
+                2 => StatusCode::REQUEST_TIMEOUT,
+                3 => StatusCode::TOO_MANY_REQUESTS,
+                _ => StatusCode::IM_A_TEAPOT,
+            })
+        })
+        .await;
+
+        let client = test_client(server.uri());
+        let error = client.fetch(42).await.unwrap_err();
+
+        assert!(matches!(
+            error,
+            Error::HttpError(42, StatusCode::IM_A_TEAPOT)
+        ));
+    }
+}
diff --git a/crates/sui-indexer-alt/src/ingestion/error.rs b/crates/sui-indexer-alt/src/ingestion/error.rs
new file mode 100644
index 0000000000000..eecce9c41c876
--- /dev/null
+++ b/crates/sui-indexer-alt/src/ingestion/error.rs
@@ -0,0 +1,24 @@
+// Copyright (c) Mysten Labs, Inc.
+// SPDX-License-Identifier: Apache-2.0
+
+use reqwest::StatusCode;
+
+pub type Result<T> = std::result::Result<T, Error>;
+
+#[derive(thiserror::Error, Debug)]
+pub enum Error {
+    #[error("Checkpoint {0} not found")]
+    NotFound(u64),
+
+    #[error("Failed to deserialize checkpoint {0}: {1}")]
+    DeserializationError(u64, #[source] anyhow::Error),
+
+    #[error("Failed to fetch checkpoint {0}: {1}")]
+    HttpError(u64, StatusCode),
+
+    #[error(transparent)]
+    ReqwestError(#[from] reqwest::Error),
+
+    #[error("No subscribers for ingestion service")]
+    NoSubscribers,
+}
diff --git a/crates/sui-indexer-alt/src/ingestion/mod.rs b/crates/sui-indexer-alt/src/ingestion/mod.rs
index 42b7ff2d8baa4..88afaf9e0e2b5 100644
--- a/crates/sui-indexer-alt/src/ingestion/mod.rs
+++ b/crates/sui-indexer-alt/src/ingestion/mod.rs
@@ -3,21 +3,21 @@
 
 use std::{sync::Arc, time::Duration};
 
-use backoff::{backoff::Constant, ExponentialBackoff};
+use backoff::backoff::Constant;
+use client::IngestionClient;
 use futures::{future::try_join_all, stream, StreamExt, TryStreamExt};
 use mysten_metrics::spawn_monitored_task;
-use reqwest::{Client, StatusCode};
-use sui_storage::blob::Blob;
 use sui_types::full_checkpoint_content::CheckpointData;
 use tokio::{sync::mpsc, task::JoinHandle};
 use tokio_util::sync::CancellationToken;
 use tracing::{debug, error, info};
 use url::Url;
 
+use crate::ingestion::error::{Error, Result};
 use crate::metrics::IndexerMetrics;
 
-/// Wait at most this long between retries for transient errors.
-const MAX_TRANSIENT_RETRY_INTERVAL: Duration = Duration::from_secs(60);
+mod client;
+pub mod error;
 
 pub struct IngestionService {
     config: IngestionConfig,
@@ -27,14 +27,6 @@ pub struct IngestionService {
     cancel: CancellationToken,
 }
 
-#[derive(Clone)]
-struct IngestionClient {
-    url: Url,
-    client: Client,
-    /// Wrap the metrics in an `Arc` to keep copies of the client cheap.
-    metrics: Arc<IndexerMetrics>,
-}
-
 #[derive(clap::Args, Debug, Clone)]
 pub struct IngestionConfig {
     /// Remote Store to fetch checkpoints from.
@@ -63,26 +55,6 @@ pub struct IngestionConfig {
     retry_interval: Duration,
 }
 
-type Result<T> = std::result::Result<T, Error>;
-
-#[derive(thiserror::Error, Debug)]
-pub enum Error {
-    #[error("Checkpoint {0} not found")]
-    NotFound(u64),
-
-    #[error("Failed to deserialize checkpoint {0}: {1}")]
-    DeserializationError(u64, #[source] anyhow::Error),
-
-    #[error("Failed to fetch checkpoint {0}: {1}")]
-    HttpError(u64, StatusCode),
-
-    #[error(transparent)]
-    ReqwestError(#[from] reqwest::Error),
-
-    #[error("No subscribers for ingestion service")]
-    NoSubscribers,
-}
-
 impl IngestionService {
     pub fn new(
         config: IngestionConfig,
@@ -211,210 +183,3 @@ impl IngestionService {
         }))
     }
 }
-
-impl IngestionClient {
-    fn new(url: Url, metrics: Arc<IndexerMetrics>) -> Result<Self> {
-        Ok(Self {
-            url,
-            client: Client::builder().build()?,
-            metrics,
-        })
-    }
-
-    /// Fetch a checkpoint from the remote store. Repeatedly retries transient errors with an
-    /// exponential backoff (up to [MAX_RETRY_INTERVAL]), but will immediately return
-    /// non-transient errors, which include all client errors, except timeouts and rate limiting.
-    async fn fetch(&self, checkpoint: u64) -> Result<Arc<CheckpointData>> {
-        // SAFETY: The path being joined is statically known to be valid.
-        let url = self
-            .url
-            .join(&format!("/{checkpoint}.chk"))
-            .expect("Unexpected invalid URL");
-
-        let request = move || {
-            let url = url.clone();
-            async move {
-                let response = self
-                    .client
-                    .get(url)
-                    .send()
-                    .await
-                    .expect("Unexpected error building request");
-
-                use backoff::Error as BE;
-                match response.status() {
-                    code if code.is_success() => Ok(response),
-
-                    // Treat 404s as a special case so we can match on this error type.
-                    code @ StatusCode::NOT_FOUND => {
-                        debug!(checkpoint, %code, "Checkpoint not found");
-                        Err(BE::permanent(Error::NotFound(checkpoint)))
-                    }
-
-                    // Timeouts are a client error but they are usually transient.
-                    code @ StatusCode::REQUEST_TIMEOUT => {
-                        debug!(checkpoint, %code, "Transient error, retrying...");
-                        self.metrics.total_ingested_transient_retries.inc();
-                        Err(BE::transient(Error::HttpError(checkpoint, code)))
-                    }
-
-                    // Rate limiting is also a client error, but the backoff will eventually widen the
-                    // interval appropriately.
-                    code @ StatusCode::TOO_MANY_REQUESTS => {
-                        debug!(checkpoint, %code, "Transient error, retrying...");
-                        self.metrics.total_ingested_transient_retries.inc();
-                        Err(BE::transient(Error::HttpError(checkpoint, code)))
-                    }
-
-                    // Assume that if the server is facing difficulties, it will recover eventually.
-                    code if code.is_server_error() => {
-                        debug!(checkpoint, %code, "Transient error, retrying...");
-                        self.metrics.total_ingested_transient_retries.inc();
-                        Err(BE::transient(Error::HttpError(checkpoint, code)))
-                    }
-
-                    // For everything else, assume it's a permanent error and don't retry.
-                    code => {
-                        debug!(checkpoint, %code, "Permanent error, giving up!");
-                        Err(BE::permanent(Error::HttpError(checkpoint, code)))
-                    }
-                }
-            }
-        };
-
-        // Keep backing off until we are waiting for the max interval, but don't give up.
-        let backoff = ExponentialBackoff {
-            max_interval: MAX_TRANSIENT_RETRY_INTERVAL,
-            max_elapsed_time: None,
-            ..Default::default()
-        };
-
-        let guard = self.metrics.ingested_checkpoint_latency.start_timer();
-
-        let bytes = backoff::future::retry(backoff, request)
-            .await?
-            .bytes()
-            .await?;
-
-        let data: CheckpointData =
-            Blob::from_bytes(&bytes).map_err(|e| Error::DeserializationError(checkpoint, e))?;
-
-        let elapsed = guard.stop_and_record();
-        debug!(
-            checkpoint,
-            "Fetched checkpoint in {:.03}ms",
-            elapsed * 1000.0
-        );
-
-        self.metrics.total_ingested_checkpoints.inc();
-        self.metrics.total_ingested_bytes.inc_by(bytes.len() as u64);
-
-        self.metrics
-            .total_ingested_transactions
-            .inc_by(data.transactions.len() as u64);
-
-        self.metrics.total_ingested_events.inc_by(
-            data.transactions
-                .iter()
-                .map(|tx| tx.events.as_ref().map_or(0, |evs| evs.data.len()) as u64)
-                .sum(),
-        );
-
-        self.metrics.total_ingested_inputs.inc_by(
-            data.transactions
-                .iter()
-                .map(|tx| tx.input_objects.len() as u64)
-                .sum(),
-        );
-
-        self.metrics.total_ingested_outputs.inc_by(
-            data.transactions
-                .iter()
-                .map(|tx| tx.output_objects.len() as u64)
-                .sum(),
-        );
-
-        Ok(Arc::new(data))
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use std::sync::Mutex;
-
-    use wiremock::{
-        matchers::{method, path_regex},
-        Mock, MockServer, Request, Respond, ResponseTemplate,
-    };
-
-    use crate::metrics::tests::test_metrics;
-
-    use super::*;
-
-    async fn respond_with(server: &MockServer, response: impl Respond + 'static) {
-        Mock::given(method("GET"))
-            .and(path_regex(r"/\d+.chk"))
-            .respond_with(response)
-            .mount(server)
-            .await;
-    }
-
-    fn status(code: StatusCode) -> ResponseTemplate {
-        ResponseTemplate::new(code.as_u16())
-    }
-
-    fn test_client(uri: String) -> IngestionClient {
-        IngestionClient::new(Url::parse(&uri).unwrap(), Arc::new(test_metrics())).unwrap()
-    }
-
-    #[tokio::test]
-    async fn not_found() {
-        let server = MockServer::start().await;
-        respond_with(&server, status(StatusCode::NOT_FOUND)).await;
-
-        let client = test_client(server.uri());
-        let error = client.fetch(42).await.unwrap_err();
-
-        assert!(matches!(error, Error::NotFound(42)));
-    }
-
-    #[tokio::test]
-    async fn client_error() {
-        let server = MockServer::start().await;
-        respond_with(&server, status(StatusCode::IM_A_TEAPOT)).await;
-
-        let client = test_client(server.uri());
-        let error = client.fetch(42).await.unwrap_err();
-
-        assert!(matches!(
-            error,
-            Error::HttpError(42, StatusCode::IM_A_TEAPOT)
-        ));
-    }
-
-    #[tokio::test]
-    async fn transient_server_error() {
-        let server = MockServer::start().await;
-
-        let times: Mutex<u64> = Mutex::new(0);
-        respond_with(&server, move |_: &Request| {
-            let mut times = times.lock().unwrap();
-            *times += 1;
-            status(match *times {
-                1 => StatusCode::INTERNAL_SERVER_ERROR,
-                2 => StatusCode::REQUEST_TIMEOUT,
-                3 => StatusCode::TOO_MANY_REQUESTS,
-                _ => StatusCode::IM_A_TEAPOT,
-            })
-        })
-        .await;
-
-        let client = test_client(server.uri());
-        let error = client.fetch(42).await.unwrap_err();
-
-        assert!(matches!(
-            error,
-            Error::HttpError(42, StatusCode::IM_A_TEAPOT)
-        ));
-    }
-}

From 7c51fa053d7cd37e04aea89e62140cca7590ff33 Mon Sep 17 00:00:00 2001
From: Ashok Menon <ashok@mystenlabs.com>
Date: Mon, 14 Oct 2024 01:39:51 +0100
Subject: [PATCH 09/33] indexer-alt: testing ingestion service

## Description

Adding tests fo the ingestion service. In particular, how it handles
various error scenarios, back-pressure, buffering, etc.

## Test plan

```
sui$ cargo nextest run -p sui-indexer-alt
```
---
 Cargo.lock                                    |   1 +
 crates/sui-indexer-alt/Cargo.toml             |   3 +
 .../sui-indexer-alt/src/ingestion/client.rs   |  66 ++++-
 crates/sui-indexer-alt/src/ingestion/mod.rs   | 255 ++++++++++++++++++
 4 files changed, 319 insertions(+), 6 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 6dedd6ec1ecdf..b2dae713e01de 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -13888,6 +13888,7 @@ dependencies = [
  "futures",
  "mysten-metrics",
  "prometheus",
+ "rand 0.8.5",
  "reqwest 0.12.5",
  "serde",
  "sui-storage",
diff --git a/crates/sui-indexer-alt/Cargo.toml b/crates/sui-indexer-alt/Cargo.toml
index 88fcf64e061f1..b84e06cd066db 100644
--- a/crates/sui-indexer-alt/Cargo.toml
+++ b/crates/sui-indexer-alt/Cargo.toml
@@ -32,4 +32,7 @@ sui-storage.workspace = true
 sui-types.workspace = true
 
 [dev-dependencies]
+rand.workspace = true
 wiremock.workspace = true
+
+sui-types = { workspace = true, features = ["test-utils"] }
diff --git a/crates/sui-indexer-alt/src/ingestion/client.rs b/crates/sui-indexer-alt/src/ingestion/client.rs
index 48ce6bb1ca505..b6f224f6b986b 100644
--- a/crates/sui-indexer-alt/src/ingestion/client.rs
+++ b/crates/sui-indexer-alt/src/ingestion/client.rs
@@ -151,9 +151,21 @@ impl IngestionClient {
 }
 
 #[cfg(test)]
-mod tests {
+pub(crate) mod tests {
     use std::sync::Mutex;
 
+    use rand::{rngs::StdRng, SeedableRng};
+    use sui_storage::blob::BlobEncoding;
+    use sui_types::{
+        crypto::KeypairTraits,
+        gas::GasCostSummary,
+        messages_checkpoint::{
+            CertifiedCheckpointSummary, CheckpointContents, CheckpointSummary,
+            SignedCheckpointSummary,
+        },
+        supported_protocol_versions::ProtocolConfig,
+        utils::make_committee_key,
+    };
     use wiremock::{
         matchers::{method, path_regex},
         Mock, MockServer, Request, Respond, ResponseTemplate,
@@ -163,7 +175,12 @@ mod tests {
 
     use super::*;
 
-    async fn respond_with(server: &MockServer, response: impl Respond + 'static) {
+    const RNG_SEED: [u8; 32] = [
+        21, 23, 199, 200, 234, 250, 252, 178, 94, 15, 202, 178, 62, 186, 88, 137, 233, 192, 130,
+        157, 179, 179, 65, 9, 31, 249, 221, 123, 225, 112, 199, 247,
+    ];
+
+    pub(crate) async fn respond_with(server: &MockServer, response: impl Respond + 'static) {
         Mock::given(method("GET"))
             .and(path_regex(r"/\d+.chk"))
             .respond_with(response)
@@ -171,16 +188,53 @@ mod tests {
             .await;
     }
 
-    fn status(code: StatusCode) -> ResponseTemplate {
+    pub(crate) fn status(code: StatusCode) -> ResponseTemplate {
         ResponseTemplate::new(code.as_u16())
     }
 
+    pub(crate) fn test_checkpoint_data(cp: u64) -> Vec<u8> {
+        let mut rng = StdRng::from_seed(RNG_SEED);
+        let (keys, committee) = make_committee_key(&mut rng);
+        let contents = CheckpointContents::new_with_digests_only_for_tests(vec![]);
+        let summary = CheckpointSummary::new(
+            &ProtocolConfig::get_for_max_version_UNSAFE(),
+            0,
+            cp,
+            0,
+            &contents,
+            None,
+            GasCostSummary::default(),
+            None,
+            0,
+            Vec::new(),
+        );
+
+        let sign_infos: Vec<_> = keys
+            .iter()
+            .map(|k| {
+                let name = k.public().into();
+                SignedCheckpointSummary::sign(committee.epoch, &summary, k, name)
+            })
+            .collect();
+
+        let checkpoint_data = CheckpointData {
+            checkpoint_summary: CertifiedCheckpointSummary::new(summary, sign_infos, &committee)
+                .unwrap(),
+            checkpoint_contents: contents,
+            transactions: vec![],
+        };
+
+        Blob::encode(&checkpoint_data, BlobEncoding::Bcs)
+            .unwrap()
+            .to_bytes()
+    }
+
     fn test_client(uri: String) -> IngestionClient {
         IngestionClient::new(Url::parse(&uri).unwrap(), Arc::new(test_metrics())).unwrap()
     }
 
     #[tokio::test]
-    async fn not_found() {
+    async fn fail_on_not_found() {
         let server = MockServer::start().await;
         respond_with(&server, status(StatusCode::NOT_FOUND)).await;
 
@@ -191,7 +245,7 @@ mod tests {
     }
 
     #[tokio::test]
-    async fn client_error() {
+    async fn retry_on_client_error() {
         let server = MockServer::start().await;
         respond_with(&server, status(StatusCode::IM_A_TEAPOT)).await;
 
@@ -205,7 +259,7 @@ mod tests {
     }
 
     #[tokio::test]
-    async fn transient_server_error() {
+    async fn retry_on_transient_server_error() {
         let server = MockServer::start().await;
 
         let times: Mutex<u64> = Mutex::new(0);
diff --git a/crates/sui-indexer-alt/src/ingestion/mod.rs b/crates/sui-indexer-alt/src/ingestion/mod.rs
index 88afaf9e0e2b5..b7b74258c514b 100644
--- a/crates/sui-indexer-alt/src/ingestion/mod.rs
+++ b/crates/sui-indexer-alt/src/ingestion/mod.rs
@@ -172,14 +172,269 @@ impl IngestionService {
                 .await
             {
                 Ok(()) => {}
+
                 Err(Break::Cancelled) => {
                     info!("Shutdown received, stopping ingestion service");
                 }
 
                 Err(Break::Err(checkpoint, e)) => {
                     error!(checkpoint, "Ingestion service failed: {}", e);
+                    cancel.cancel();
                 }
             }
         }))
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use std::sync::Mutex;
+
+    use reqwest::StatusCode;
+    use wiremock::{MockServer, Request};
+
+    use crate::ingestion::client::tests::{respond_with, status, test_checkpoint_data};
+    use crate::metrics::tests::test_metrics;
+
+    use super::*;
+
+    async fn test_ingestion(
+        uri: String,
+        buffer_size: usize,
+        concurrency: usize,
+        cancel: CancellationToken,
+    ) -> IngestionService {
+        IngestionService::new(
+            IngestionConfig {
+                remote_store_url: Url::parse(&uri).unwrap(),
+                start_checkpoint: 0,
+                buffer_size,
+                concurrency,
+                retry_interval: Duration::from_millis(200),
+            },
+            test_metrics(),
+            cancel,
+        )
+        .unwrap()
+    }
+
+    async fn test_subscriber(
+        stop_after: usize,
+        mut rx: mpsc::Receiver<Arc<CheckpointData>>,
+        cancel: CancellationToken,
+    ) -> JoinHandle<Vec<u64>> {
+        spawn_monitored_task!(async move {
+            let mut seqs = vec![];
+            for _ in 0..stop_after {
+                tokio::select! {
+                    _ = cancel.cancelled() => break,
+                    Some(checkpoint) = rx.recv() => {
+                        seqs.push(checkpoint.checkpoint_summary.sequence_number);
+                    }
+                }
+            }
+
+            rx.close();
+            seqs
+        })
+    }
+
+    /// If the ingestion service has no subscribers, it will fail fast (before fetching any
+    /// checkpoints).
+    #[tokio::test]
+    async fn fail_on_no_subscribers() {
+        telemetry_subscribers::init_for_testing();
+
+        // The mock server will repeatedly return 404, so if the service does try to fetch a
+        // checkpoint, it will be stuck repeatedly retrying.
+        let server = MockServer::start().await;
+        respond_with(&server, status(StatusCode::NOT_FOUND)).await;
+
+        let cancel = CancellationToken::new();
+        let ingestion_service = test_ingestion(server.uri(), 1, 1, cancel.clone()).await;
+
+        let err = ingestion_service.run().await.unwrap_err();
+        assert!(matches!(err, Error::NoSubscribers));
+    }
+
+    /// The subscriber has no effective limit, and the mock server will always return checkpoint
+    /// information, but the ingestion service can still be stopped using the cancellation token.
+    #[tokio::test]
+    async fn shutdown_on_cancel() {
+        telemetry_subscribers::init_for_testing();
+
+        let server = MockServer::start().await;
+        respond_with(
+            &server,
+            status(StatusCode::OK).set_body_bytes(test_checkpoint_data(42)),
+        )
+        .await;
+
+        let cancel = CancellationToken::new();
+        let mut ingestion_service = test_ingestion(server.uri(), 1, 1, cancel.clone()).await;
+
+        let rx = ingestion_service.subscribe();
+        let subscriber_handle = test_subscriber(usize::MAX, rx, cancel.clone()).await;
+        let ingestion_handle = ingestion_service.run().await.unwrap();
+
+        cancel.cancel();
+        subscriber_handle.await.unwrap();
+        ingestion_handle.await.unwrap();
+    }
+
+    /// The subscriber will stop after receiving a single checkpoint, and this will trigger the
+    /// ingestion service to stop as well, even if there are more checkpoints to fetch.
+    #[tokio::test]
+    async fn shutdown_on_subscriber_drop() {
+        telemetry_subscribers::init_for_testing();
+
+        let server = MockServer::start().await;
+        respond_with(
+            &server,
+            status(StatusCode::OK).set_body_bytes(test_checkpoint_data(42)),
+        )
+        .await;
+
+        let cancel = CancellationToken::new();
+        let mut ingestion_service = test_ingestion(server.uri(), 1, 1, cancel.clone()).await;
+
+        let rx = ingestion_service.subscribe();
+        let subscriber_handle = test_subscriber(1, rx, cancel.clone()).await;
+        let ingestion_handle = ingestion_service.run().await.unwrap();
+
+        cancel.cancelled().await;
+        subscriber_handle.await.unwrap();
+        ingestion_handle.await.unwrap();
+    }
+
+    /// If fetching the checkpoint throws an unexpected error, the whole pipeline will be shut
+    /// down.
+    #[tokio::test]
+    async fn shutdown_on_unexpected_error() {
+        telemetry_subscribers::init_for_testing();
+
+        let server = MockServer::start().await;
+        respond_with(&server, status(StatusCode::IM_A_TEAPOT)).await;
+
+        let cancel = CancellationToken::new();
+        let mut ingestion_service = test_ingestion(server.uri(), 1, 1, cancel.clone()).await;
+
+        let rx = ingestion_service.subscribe();
+        let subscriber_handle = test_subscriber(usize::MAX, rx, cancel.clone()).await;
+        let ingestion_handle = ingestion_service.run().await.unwrap();
+
+        cancel.cancelled().await;
+        subscriber_handle.await.unwrap();
+        ingestion_handle.await.unwrap();
+    }
+
+    /// The service will retry fetching a checkpoint that does not exist, in this test, the 4th
+    /// checkpoint will return 404 a couple of times, before eventually succeeding.
+    #[tokio::test]
+    async fn retry_on_not_found() {
+        telemetry_subscribers::init_for_testing();
+
+        let server = MockServer::start().await;
+        let times: Mutex<u64> = Mutex::new(0);
+        respond_with(&server, move |_: &Request| {
+            let mut times = times.lock().unwrap();
+            *times += 1;
+            match *times {
+                1..4 => status(StatusCode::OK).set_body_bytes(test_checkpoint_data(*times)),
+                4..6 => status(StatusCode::NOT_FOUND),
+                _ => status(StatusCode::OK).set_body_bytes(test_checkpoint_data(*times)),
+            }
+        })
+        .await;
+
+        let cancel = CancellationToken::new();
+        let mut ingestion_service = test_ingestion(server.uri(), 1, 1, cancel.clone()).await;
+
+        let rx = ingestion_service.subscribe();
+        let subscriber_handle = test_subscriber(5, rx, cancel.clone()).await;
+        let ingestion_handle = ingestion_service.run().await.unwrap();
+
+        cancel.cancelled().await;
+        let seqs = subscriber_handle.await.unwrap();
+        ingestion_handle.await.unwrap();
+
+        assert_eq!(seqs, vec![1, 2, 3, 6, 7]);
+    }
+
+    /// Similar to the previous test, but now it's a transient error that causes the retry.
+    #[tokio::test]
+    async fn retry_on_transient_error() {
+        telemetry_subscribers::init_for_testing();
+
+        let server = MockServer::start().await;
+        let times: Mutex<u64> = Mutex::new(0);
+        respond_with(&server, move |_: &Request| {
+            let mut times = times.lock().unwrap();
+            *times += 1;
+            match *times {
+                1..4 => status(StatusCode::OK).set_body_bytes(test_checkpoint_data(*times)),
+                4..6 => status(StatusCode::REQUEST_TIMEOUT),
+                _ => status(StatusCode::OK).set_body_bytes(test_checkpoint_data(*times)),
+            }
+        })
+        .await;
+
+        let cancel = CancellationToken::new();
+        let mut ingestion_service = test_ingestion(server.uri(), 1, 1, cancel.clone()).await;
+
+        let rx = ingestion_service.subscribe();
+        let subscriber_handle = test_subscriber(5, rx, cancel.clone()).await;
+        let ingestion_handle = ingestion_service.run().await.unwrap();
+
+        cancel.cancelled().await;
+        let seqs = subscriber_handle.await.unwrap();
+        ingestion_handle.await.unwrap();
+
+        assert_eq!(seqs, vec![1, 2, 3, 6, 7]);
+    }
+
+    /// One subscriber is going to stop processing checkpoints, so even though the service can keep
+    /// fetching checkpoints, it will stop short because of the slow receiver. Other subscribers
+    /// can keep processing checkpoints that were buffered for the slow one.
+    #[tokio::test]
+    async fn back_pressure_and_buffering() {
+        telemetry_subscribers::init_for_testing();
+
+        let server = MockServer::start().await;
+        let times: Mutex<u64> = Mutex::new(0);
+        respond_with(&server, move |_: &Request| {
+            let mut times = times.lock().unwrap();
+            *times += 1;
+            status(StatusCode::OK).set_body_bytes(test_checkpoint_data(*times))
+        })
+        .await;
+
+        let cancel = CancellationToken::new();
+        let mut ingestion_service =
+            test_ingestion(server.uri(), /* buffer */ 3, 1, cancel.clone()).await;
+
+        // This subscriber will take its sweet time processing checkpoints.
+        let mut laggard = ingestion_service.subscribe();
+        async fn unblock(laggard: &mut mpsc::Receiver<Arc<CheckpointData>>) -> u64 {
+            let checkpoint = laggard.recv().await.unwrap();
+            checkpoint.checkpoint_summary.sequence_number
+        }
+
+        let rx = ingestion_service.subscribe();
+        let subscriber_handle = test_subscriber(5, rx, cancel.clone()).await;
+        let ingestion_handle = ingestion_service.run().await.unwrap();
+
+        // At this point, the service will have been able to pass 3 checkpoints to the non-lagging
+        // subscriber, while the laggard's buffer fills up. Now the laggard will pull two
+        // checkpoints, which will allow the rest of the pipeline to progress enough for the live
+        // subscriber to receive its quota.
+        assert_eq!(unblock(&mut laggard).await, 1);
+        assert_eq!(unblock(&mut laggard).await, 2);
+
+        cancel.cancelled().await;
+        let seqs = subscriber_handle.await.unwrap();
+        ingestion_handle.await.unwrap();
+
+        assert_eq!(seqs, vec![1, 2, 3, 4, 5]);
+    }
+}

From d71278d9acb54675fc4cb1f43e2d48326bd46997 Mon Sep 17 00:00:00 2001
From: Ashok Menon <ashok@mystenlabs.com>
Date: Mon, 14 Oct 2024 11:01:08 +0100
Subject: [PATCH 10/33] indexer-alt: treat bcs deserialization errors as
 transient

## Description

Assume that whatever store checkpoint data is being fetched from is
correct. If it was not possible to get a full response back from the
store, or that response could not be successfully deserialized, back-off
and try again.

## Test plan

New unit test:

```
sui$ cargo nextest run -p sui-indexer-alt -- retry_on_deserialization_error
```
---
 .../sui-indexer-alt/src/ingestion/client.rs   | 60 ++++++++++++++-----
 1 file changed, 45 insertions(+), 15 deletions(-)

diff --git a/crates/sui-indexer-alt/src/ingestion/client.rs b/crates/sui-indexer-alt/src/ingestion/client.rs
index b6f224f6b986b..75763e80865eb 100644
--- a/crates/sui-indexer-alt/src/ingestion/client.rs
+++ b/crates/sui-indexer-alt/src/ingestion/client.rs
@@ -55,7 +55,23 @@ impl IngestionClient {
 
                 use backoff::Error as BE;
                 match response.status() {
-                    code if code.is_success() => Ok(response),
+                    code if code.is_success() => {
+                        // Failure to extract all the bytes from the payload, or to deserialize the
+                        // checkpoint from them is considered a transient error -- the store being
+                        // fetched from needs to be corrected, and ingestion will keep retrying it
+                        // until it is.
+                        let bytes = response
+                            .bytes()
+                            .await
+                            .map_err(|e| BE::transient(Error::ReqwestError(e)))?;
+
+                        self.metrics.total_ingested_bytes.inc_by(bytes.len() as u64);
+                        let data: CheckpointData = Blob::from_bytes(&bytes).map_err(|e| {
+                            BE::transient(Error::DeserializationError(checkpoint, e))
+                        })?;
+
+                        Ok(data)
+                    }
 
                     // Treat 404s as a special case so we can match on this error type.
                     code @ StatusCode::NOT_FOUND => {
@@ -102,25 +118,15 @@ impl IngestionClient {
         };
 
         let guard = self.metrics.ingested_checkpoint_latency.start_timer();
-
-        let bytes = backoff::future::retry(backoff, request)
-            .await?
-            .bytes()
-            .await?;
-
-        let data: CheckpointData =
-            Blob::from_bytes(&bytes).map_err(|e| Error::DeserializationError(checkpoint, e))?;
-
+        let data = backoff::future::retry(backoff, request).await?;
         let elapsed = guard.stop_and_record();
+
         debug!(
             checkpoint,
             "Fetched checkpoint in {:.03}ms",
             elapsed * 1000.0
         );
 
-        self.metrics.total_ingested_checkpoints.inc();
-        self.metrics.total_ingested_bytes.inc_by(bytes.len() as u64);
-
         self.metrics
             .total_ingested_transactions
             .inc_by(data.transactions.len() as u64);
@@ -245,7 +251,7 @@ pub(crate) mod tests {
     }
 
     #[tokio::test]
-    async fn retry_on_client_error() {
+    async fn fail_on_client_error() {
         let server = MockServer::start().await;
         respond_with(&server, status(StatusCode::IM_A_TEAPOT)).await;
 
@@ -258,10 +264,12 @@ pub(crate) mod tests {
         ));
     }
 
+    /// Assume that certain errors will recover by themselves, and keep retrying with an
+    /// exponential back-off. These errors include: 5xx (server) errors, 408 (timeout), and 429
+    /// (rate limiting).
     #[tokio::test]
     async fn retry_on_transient_server_error() {
         let server = MockServer::start().await;
-
         let times: Mutex<u64> = Mutex::new(0);
         respond_with(&server, move |_: &Request| {
             let mut times = times.lock().unwrap();
@@ -283,4 +291,26 @@ pub(crate) mod tests {
             Error::HttpError(42, StatusCode::IM_A_TEAPOT)
         ));
     }
+
+    /// Treat deserialization failure as another kind of transient error -- all checkpoint data
+    /// that is fetched should be valid (deserializable as a `CheckpointData`).
+    #[tokio::test]
+    async fn retry_on_deserialization_error() {
+        let server = MockServer::start().await;
+        let times: Mutex<u64> = Mutex::new(0);
+        respond_with(&server, move |_: &Request| {
+            let mut times = times.lock().unwrap();
+            *times += 1;
+            if *times < 3 {
+                status(StatusCode::OK).set_body_bytes(vec![])
+            } else {
+                status(StatusCode::OK).set_body_bytes(test_checkpoint_data(42))
+            }
+        })
+        .await;
+
+        let client = test_client(server.uri());
+        let checkpoint = client.fetch(42).await.unwrap();
+        assert_eq!(42, checkpoint.checkpoint_summary.sequence_number)
+    }
 }

From e457e4ced2ea00733c5f5628864730834cdfd90e Mon Sep 17 00:00:00 2001
From: Ashok Menon <ashok@mystenlabs.com>
Date: Mon, 14 Oct 2024 11:31:08 +0100
Subject: [PATCH 11/33] indexer-alt: tag retries with reason

## Description

Add a label to the transient retries counter to mark the reason why
we're retrying, in case there are trends in particular retries.
---
 .../sui-indexer-alt/src/ingestion/client.rs   | 22 ++++++++--------
 crates/sui-indexer-alt/src/metrics.rs         | 25 +++++++++++++------
 2 files changed, 28 insertions(+), 19 deletions(-)

diff --git a/crates/sui-indexer-alt/src/ingestion/client.rs b/crates/sui-indexer-alt/src/ingestion/client.rs
index 75763e80865eb..181061dc31efe 100644
--- a/crates/sui-indexer-alt/src/ingestion/client.rs
+++ b/crates/sui-indexer-alt/src/ingestion/client.rs
@@ -7,7 +7,7 @@ use backoff::ExponentialBackoff;
 use reqwest::{Client, StatusCode};
 use sui_storage::blob::Blob;
 use sui_types::full_checkpoint_content::CheckpointData;
-use tracing::debug;
+use tracing::{debug, error};
 use url::Url;
 
 use crate::ingestion::error::{Error, Result};
@@ -60,13 +60,14 @@ impl IngestionClient {
                         // checkpoint from them is considered a transient error -- the store being
                         // fetched from needs to be corrected, and ingestion will keep retrying it
                         // until it is.
-                        let bytes = response
-                            .bytes()
-                            .await
-                            .map_err(|e| BE::transient(Error::ReqwestError(e)))?;
+                        let bytes = response.bytes().await.map_err(|e| {
+                            self.metrics.inc_retry(checkpoint, "bytes");
+                            BE::transient(Error::ReqwestError(e))
+                        })?;
 
                         self.metrics.total_ingested_bytes.inc_by(bytes.len() as u64);
                         let data: CheckpointData = Blob::from_bytes(&bytes).map_err(|e| {
+                            self.metrics.inc_retry(checkpoint, "deserialization");
                             BE::transient(Error::DeserializationError(checkpoint, e))
                         })?;
 
@@ -81,29 +82,26 @@ impl IngestionClient {
 
                     // Timeouts are a client error but they are usually transient.
                     code @ StatusCode::REQUEST_TIMEOUT => {
-                        debug!(checkpoint, %code, "Transient error, retrying...");
-                        self.metrics.total_ingested_transient_retries.inc();
+                        self.metrics.inc_retry(checkpoint, "timeout");
                         Err(BE::transient(Error::HttpError(checkpoint, code)))
                     }
 
                     // Rate limiting is also a client error, but the backoff will eventually widen the
                     // interval appropriately.
                     code @ StatusCode::TOO_MANY_REQUESTS => {
-                        debug!(checkpoint, %code, "Transient error, retrying...");
-                        self.metrics.total_ingested_transient_retries.inc();
+                        self.metrics.inc_retry(checkpoint, "too_many_requests");
                         Err(BE::transient(Error::HttpError(checkpoint, code)))
                     }
 
                     // Assume that if the server is facing difficulties, it will recover eventually.
                     code if code.is_server_error() => {
-                        debug!(checkpoint, %code, "Transient error, retrying...");
-                        self.metrics.total_ingested_transient_retries.inc();
+                        self.metrics.inc_retry(checkpoint, "server_error");
                         Err(BE::transient(Error::HttpError(checkpoint, code)))
                     }
 
                     // For everything else, assume it's a permanent error and don't retry.
                     code => {
-                        debug!(checkpoint, %code, "Permanent error, giving up!");
+                        error!(checkpoint, %code, "Permanent error, giving up!");
                         Err(BE::permanent(Error::HttpError(checkpoint, code)))
                     }
                 }
diff --git a/crates/sui-indexer-alt/src/metrics.rs b/crates/sui-indexer-alt/src/metrics.rs
index 4d89444b32126..106c115f05662 100644
--- a/crates/sui-indexer-alt/src/metrics.rs
+++ b/crates/sui-indexer-alt/src/metrics.rs
@@ -7,12 +7,12 @@ use anyhow::Result;
 use axum::{extract::Extension, routing::get, Router};
 use mysten_metrics::RegistryService;
 use prometheus::{
-    register_histogram_with_registry, register_int_counter_with_registry, Histogram, IntCounter,
-    Registry,
+    register_histogram_with_registry, register_int_counter_vec_with_registry,
+    register_int_counter_with_registry, Histogram, IntCounter, IntCounterVec, Registry,
 };
 use tokio::{net::TcpListener, task::JoinHandle};
 use tokio_util::sync::CancellationToken;
-use tracing::info;
+use tracing::{info, warn};
 
 /// Histogram buckets for the distribution of checkpoint fetching latencies.
 const INGESTION_LATENCY_SEC_BUCKETS: &[f64] = &[
@@ -35,7 +35,7 @@ pub struct IndexerMetrics {
     pub total_ingested_inputs: IntCounter,
     pub total_ingested_outputs: IntCounter,
     pub total_ingested_bytes: IntCounter,
-    pub total_ingested_transient_retries: IntCounter,
+    pub total_ingested_transient_retries: IntCounterVec,
     pub total_ingested_not_found_retries: IntCounter,
 
     // Distribution of times taken to fetch data from the remote store, including time taken on
@@ -123,15 +123,18 @@ impl IndexerMetrics {
                 registry,
             )
             .unwrap(),
-            total_ingested_transient_retries: register_int_counter_with_registry!(
+            total_ingested_transient_retries: register_int_counter_vec_with_registry!(
                 "indexer_total_ingested_retries",
-                "Total number of retries due to transient errors while fetching data from the remote store",
+                "Total number of retries due to transient errors while fetching data from the \
+                 remote store",
+                &["reason"],
                 registry,
             )
             .unwrap(),
             total_ingested_not_found_retries: register_int_counter_with_registry!(
                 "indexer_total_ingested_not_found_retries",
-                "Total number of retries due to the not found errors while fetching data from the remote store",
+                "Total number of retries due to the not found errors while fetching data from the \
+                 remote store",
                 registry,
             )
             .unwrap(),
@@ -144,6 +147,14 @@ impl IndexerMetrics {
             .unwrap(),
         }
     }
+
+    /// Register that we're retrying a checkpoint fetch due to a transient error.
+    pub(crate) fn inc_retry(&self, checkpoint: u64, reason: &str) {
+        warn!(checkpoint, reason, "Transient error, retrying...");
+        self.total_ingested_transient_retries
+            .with_label_values(&[reason])
+            .inc();
+    }
 }
 
 #[cfg(test)]

From 145b7e4283af36fc19bc19d52d983faf31703b38 Mon Sep 17 00:00:00 2001
From: Ashok Menon <ashok@mystenlabs.com>
Date: Mon, 14 Oct 2024 11:44:15 +0100
Subject: [PATCH 12/33] refactor(indexer-alt): helper to log retries

## Description

Add a helper function to log retrying a fetched checkpoint. Updates a
counter, and traces the error and reason.
---
 .../sui-indexer-alt/src/ingestion/client.rs   | 38 +++++++++++--------
 crates/sui-indexer-alt/src/metrics.rs         | 17 +++++++--
 2 files changed, 36 insertions(+), 19 deletions(-)

diff --git a/crates/sui-indexer-alt/src/ingestion/client.rs b/crates/sui-indexer-alt/src/ingestion/client.rs
index 181061dc31efe..db064fc8ad602 100644
--- a/crates/sui-indexer-alt/src/ingestion/client.rs
+++ b/crates/sui-indexer-alt/src/ingestion/client.rs
@@ -61,14 +61,17 @@ impl IngestionClient {
                         // fetched from needs to be corrected, and ingestion will keep retrying it
                         // until it is.
                         let bytes = response.bytes().await.map_err(|e| {
-                            self.metrics.inc_retry(checkpoint, "bytes");
-                            BE::transient(Error::ReqwestError(e))
+                            self.metrics
+                                .inc_retry(checkpoint, "bytes", Error::ReqwestError(e))
                         })?;
 
                         self.metrics.total_ingested_bytes.inc_by(bytes.len() as u64);
                         let data: CheckpointData = Blob::from_bytes(&bytes).map_err(|e| {
-                            self.metrics.inc_retry(checkpoint, "deserialization");
-                            BE::transient(Error::DeserializationError(checkpoint, e))
+                            self.metrics.inc_retry(
+                                checkpoint,
+                                "deserialization",
+                                Error::DeserializationError(checkpoint, e),
+                            )
                         })?;
 
                         Ok(data)
@@ -81,23 +84,26 @@ impl IngestionClient {
                     }
 
                     // Timeouts are a client error but they are usually transient.
-                    code @ StatusCode::REQUEST_TIMEOUT => {
-                        self.metrics.inc_retry(checkpoint, "timeout");
-                        Err(BE::transient(Error::HttpError(checkpoint, code)))
-                    }
+                    code @ StatusCode::REQUEST_TIMEOUT => Err(self.metrics.inc_retry(
+                        checkpoint,
+                        "timeout",
+                        Error::HttpError(checkpoint, code),
+                    )),
 
                     // Rate limiting is also a client error, but the backoff will eventually widen the
                     // interval appropriately.
-                    code @ StatusCode::TOO_MANY_REQUESTS => {
-                        self.metrics.inc_retry(checkpoint, "too_many_requests");
-                        Err(BE::transient(Error::HttpError(checkpoint, code)))
-                    }
+                    code @ StatusCode::TOO_MANY_REQUESTS => Err(self.metrics.inc_retry(
+                        checkpoint,
+                        "too_many_requests",
+                        Error::HttpError(checkpoint, code),
+                    )),
 
                     // Assume that if the server is facing difficulties, it will recover eventually.
-                    code if code.is_server_error() => {
-                        self.metrics.inc_retry(checkpoint, "server_error");
-                        Err(BE::transient(Error::HttpError(checkpoint, code)))
-                    }
+                    code if code.is_server_error() => Err(self.metrics.inc_retry(
+                        checkpoint,
+                        "server_error",
+                        Error::HttpError(checkpoint, code),
+                    )),
 
                     // For everything else, assume it's a permanent error and don't retry.
                     code => {
diff --git a/crates/sui-indexer-alt/src/metrics.rs b/crates/sui-indexer-alt/src/metrics.rs
index 106c115f05662..399bc38dae39c 100644
--- a/crates/sui-indexer-alt/src/metrics.rs
+++ b/crates/sui-indexer-alt/src/metrics.rs
@@ -14,6 +14,8 @@ use tokio::{net::TcpListener, task::JoinHandle};
 use tokio_util::sync::CancellationToken;
 use tracing::{info, warn};
 
+use crate::ingestion::error::Error;
+
 /// Histogram buckets for the distribution of checkpoint fetching latencies.
 const INGESTION_LATENCY_SEC_BUCKETS: &[f64] = &[
     0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0, 20.0, 50.0, 100.0,
@@ -148,12 +150,21 @@ impl IndexerMetrics {
         }
     }
 
-    /// Register that we're retrying a checkpoint fetch due to a transient error.
-    pub(crate) fn inc_retry(&self, checkpoint: u64, reason: &str) {
-        warn!(checkpoint, reason, "Transient error, retrying...");
+    /// Register that we're retrying a checkpoint fetch due to a transient error, logging the
+    /// reason and error.
+    pub(crate) fn inc_retry(
+        &self,
+        checkpoint: u64,
+        reason: &str,
+        error: Error,
+    ) -> backoff::Error<Error> {
+        warn!(checkpoint, reason, "Retrying due to error: {error}");
+
         self.total_ingested_transient_retries
             .with_label_values(&[reason])
             .inc();
+
+        backoff::Error::transient(error)
     }
 }
 

From 8a1f145cd1dce986d9b7834042cab0614acea566 Mon Sep 17 00:00:00 2001
From: Ashok Menon <ashok@mystenlabs.com>
Date: Mon, 14 Oct 2024 12:33:44 +0100
Subject: [PATCH 13/33] indexer-alt: retry on request errors

## Description

Recover from failures to send a request to the remote store in the first
place (Initially, I mistakenly thought that `send` would only fail if
the request was malformed, and that we would otherwise get a Response of
some kind, but this is not true).

## Test plan

New unit test:

```
sui$ cargo nextest run -p sui-indexer-alt -- retry_on_reuest_error
```
---
 .../sui-indexer-alt/src/ingestion/client.rs   | 46 ++++++++++++++++---
 1 file changed, 40 insertions(+), 6 deletions(-)

diff --git a/crates/sui-indexer-alt/src/ingestion/client.rs b/crates/sui-indexer-alt/src/ingestion/client.rs
index db064fc8ad602..0428345836b50 100644
--- a/crates/sui-indexer-alt/src/ingestion/client.rs
+++ b/crates/sui-indexer-alt/src/ingestion/client.rs
@@ -46,12 +46,10 @@ impl IngestionClient {
         let request = move || {
             let url = url.clone();
             async move {
-                let response = self
-                    .client
-                    .get(url)
-                    .send()
-                    .await
-                    .expect("Unexpected error building request");
+                let response = self.client.get(url).send().await.map_err(|e| {
+                    self.metrics
+                        .inc_retry(checkpoint, "request", Error::ReqwestError(e))
+                })?;
 
                 use backoff::Error as BE;
                 match response.status() {
@@ -268,6 +266,42 @@ pub(crate) mod tests {
         ));
     }
 
+    /// Assume that failures to send the request to the remote store are due to temporary
+    /// connectivity issues, and retry them.
+    #[tokio::test]
+    async fn retry_on_request_error() {
+        let server = MockServer::start().await;
+
+        let times: Mutex<u64> = Mutex::new(0);
+        respond_with(&server, move |r: &Request| {
+            let mut times = times.lock().unwrap();
+            *times += 1;
+            match (*times, r.url.path()) {
+                // The first request will trigger a redirect to 0.chk no matter what the original
+                // request was for -- triggering a request error.
+                (1, _) => status(StatusCode::MOVED_PERMANENTLY).append_header("Location", "/0.chk"),
+
+                // Set-up checkpoint 0 as an infinite redirect loop.
+                (_, "/0.chk") => {
+                    status(StatusCode::MOVED_PERMANENTLY).append_header("Location", r.url.as_str())
+                }
+
+                // Subsequently, requests will fail with a permanent error, this is what we expect
+                // to see.
+                _ => status(StatusCode::IM_A_TEAPOT),
+            }
+        })
+        .await;
+
+        let client = test_client(server.uri());
+        let error = client.fetch(42).await.unwrap_err();
+
+        assert!(
+            matches!(error, Error::HttpError(42, StatusCode::IM_A_TEAPOT),),
+            "{error}"
+        );
+    }
+
     /// Assume that certain errors will recover by themselves, and keep retrying with an
     /// exponential back-off. These errors include: 5xx (server) errors, 408 (timeout), and 429
     /// (rate limiting).

From 36d6efe01e57b05a461c7ab75ae4e2270a369e62 Mon Sep 17 00:00:00 2001
From: Ashok Menon <ashok@mystenlabs.com>
Date: Mon, 14 Oct 2024 13:05:44 +0100
Subject: [PATCH 14/33] indexer-alt: cancel fetch retry

## Description

If ingestion is stuck talking to a server that is repeatedly producing
transient errors, it can get stuck. This change gives us a way to tell
the ingestion service to give up from outside.

## Test plan

New unit tests:

```
sui$ cargo nextest run -p sui-indexer-alt -- fail_on_cancel
```
---
 .../sui-indexer-alt/src/ingestion/client.rs   | 78 ++++++++++++++++---
 crates/sui-indexer-alt/src/ingestion/error.rs |  3 +
 crates/sui-indexer-alt/src/ingestion/mod.rs   | 23 ++----
 3 files changed, 80 insertions(+), 24 deletions(-)

diff --git a/crates/sui-indexer-alt/src/ingestion/client.rs b/crates/sui-indexer-alt/src/ingestion/client.rs
index 0428345836b50..bfe8c3c9286c5 100644
--- a/crates/sui-indexer-alt/src/ingestion/client.rs
+++ b/crates/sui-indexer-alt/src/ingestion/client.rs
@@ -7,6 +7,7 @@ use backoff::ExponentialBackoff;
 use reqwest::{Client, StatusCode};
 use sui_storage::blob::Blob;
 use sui_types::full_checkpoint_content::CheckpointData;
+use tokio_util::sync::CancellationToken;
 use tracing::{debug, error};
 use url::Url;
 
@@ -34,9 +35,23 @@ impl IngestionClient {
     }
 
     /// Fetch a checkpoint from the remote store. Repeatedly retries transient errors with an
-    /// exponential backoff (up to [MAX_RETRY_INTERVAL]), but will immediately return
-    /// non-transient errors, which include all client errors, except timeouts and rate limiting.
-    pub(crate) async fn fetch(&self, checkpoint: u64) -> Result<Arc<CheckpointData>> {
+    /// exponential backoff (up to [MAX_RETRY_INTERVAL]), but will immediately return on:
+    ///
+    /// - non-transient errors, which include all client errors, except timeouts and rate limiting.
+    /// - cancellation of the supplied `cancel` token.
+    ///
+    /// Transient errors include:
+    ///
+    /// - failures to issue a request, (network errors, redirect issues, etc)
+    /// - request timeouts,
+    /// - rate limiting,
+    /// - server errors (5xx),
+    /// - issues getting a full response and deserializing it as [CheckpointData].
+    pub(crate) async fn fetch(
+        &self,
+        checkpoint: u64,
+        cancel: &CancellationToken,
+    ) -> Result<Arc<CheckpointData>> {
         // SAFETY: The path being joined is statically known to be valid.
         let url = self
             .url
@@ -46,12 +61,16 @@ impl IngestionClient {
         let request = move || {
             let url = url.clone();
             async move {
+                use backoff::Error as BE;
+                if cancel.is_cancelled() {
+                    return Err(BE::permanent(Error::Cancelled));
+                }
+
                 let response = self.client.get(url).send().await.map_err(|e| {
                     self.metrics
                         .inc_retry(checkpoint, "request", Error::ReqwestError(e))
                 })?;
 
-                use backoff::Error as BE;
                 match response.status() {
                     code if code.is_success() => {
                         // Failure to extract all the bytes from the payload, or to deserialize the
@@ -247,7 +266,10 @@ pub(crate) mod tests {
         respond_with(&server, status(StatusCode::NOT_FOUND)).await;
 
         let client = test_client(server.uri());
-        let error = client.fetch(42).await.unwrap_err();
+        let error = client
+            .fetch(42, &CancellationToken::new())
+            .await
+            .unwrap_err();
 
         assert!(matches!(error, Error::NotFound(42)));
     }
@@ -258,7 +280,10 @@ pub(crate) mod tests {
         respond_with(&server, status(StatusCode::IM_A_TEAPOT)).await;
 
         let client = test_client(server.uri());
-        let error = client.fetch(42).await.unwrap_err();
+        let error = client
+            .fetch(42, &CancellationToken::new())
+            .await
+            .unwrap_err();
 
         assert!(matches!(
             error,
@@ -266,6 +291,35 @@ pub(crate) mod tests {
         ));
     }
 
+    /// Even if the server is repeatedly returning transient errors, it is possible to cancel the
+    /// fetch request via its cancellation token.
+    #[tokio::test]
+    async fn fail_on_cancel() {
+        let cancel = CancellationToken::new();
+        let server = MockServer::start().await;
+
+        // This mock server repeatedly returns internal server errors, but will also send a
+        // cancellation with the second request (this is a bit of a contrived test set-up).
+        let times: Mutex<u64> = Mutex::new(0);
+        let server_cancel = cancel.clone();
+        respond_with(&server, move |_: &Request| {
+            let mut times = times.lock().unwrap();
+            *times += 1;
+
+            if *times > 2 {
+                server_cancel.cancel();
+            }
+
+            status(StatusCode::INTERNAL_SERVER_ERROR)
+        })
+        .await;
+
+        let client = test_client(server.uri());
+        let error = client.fetch(42, &cancel.clone()).await.unwrap_err();
+
+        assert!(matches!(error, Error::Cancelled));
+    }
+
     /// Assume that failures to send the request to the remote store are due to temporary
     /// connectivity issues, and retry them.
     #[tokio::test]
@@ -294,7 +348,10 @@ pub(crate) mod tests {
         .await;
 
         let client = test_client(server.uri());
-        let error = client.fetch(42).await.unwrap_err();
+        let error = client
+            .fetch(42, &CancellationToken::new())
+            .await
+            .unwrap_err();
 
         assert!(
             matches!(error, Error::HttpError(42, StatusCode::IM_A_TEAPOT),),
@@ -322,7 +379,10 @@ pub(crate) mod tests {
         .await;
 
         let client = test_client(server.uri());
-        let error = client.fetch(42).await.unwrap_err();
+        let error = client
+            .fetch(42, &CancellationToken::new())
+            .await
+            .unwrap_err();
 
         assert!(matches!(
             error,
@@ -348,7 +408,7 @@ pub(crate) mod tests {
         .await;
 
         let client = test_client(server.uri());
-        let checkpoint = client.fetch(42).await.unwrap();
+        let checkpoint = client.fetch(42, &CancellationToken::new()).await.unwrap();
         assert_eq!(42, checkpoint.checkpoint_summary.sequence_number)
     }
 }
diff --git a/crates/sui-indexer-alt/src/ingestion/error.rs b/crates/sui-indexer-alt/src/ingestion/error.rs
index eecce9c41c876..78fff94d46d8f 100644
--- a/crates/sui-indexer-alt/src/ingestion/error.rs
+++ b/crates/sui-indexer-alt/src/ingestion/error.rs
@@ -21,4 +21,7 @@ pub enum Error {
 
     #[error("No subscribers for ingestion service")]
     NoSubscribers,
+
+    #[error("Shutdown signal received, stopping ingestion service")]
+    Cancelled,
 }
diff --git a/crates/sui-indexer-alt/src/ingestion/mod.rs b/crates/sui-indexer-alt/src/ingestion/mod.rs
index b7b74258c514b..7a426c0b9f442 100644
--- a/crates/sui-indexer-alt/src/ingestion/mod.rs
+++ b/crates/sui-indexer-alt/src/ingestion/mod.rs
@@ -103,13 +103,6 @@ impl IngestionService {
             cancel,
         } = self;
 
-        /// Individual iterations of `try_for_each_concurrent` communicate with the supervisor by
-        /// returning an `Err` with a `Break` variant.
-        enum Break {
-            Cancelled,
-            Err(u64, Error),
-        }
-
         if subscribers.is_empty() {
             return Err(Error::NoSubscribers);
         }
@@ -142,16 +135,16 @@ impl IngestionService {
                         async move {
                             use backoff::Error as BE;
                             if cancel.is_cancelled() {
-                                return Err(BE::permanent(Break::Cancelled));
+                                return Err(BE::permanent(Error::Cancelled));
                             }
 
-                            client.fetch(cp).await.map_err(|e| match e {
+                            client.fetch(cp, &cancel).await.map_err(|e| match e {
                                 Error::NotFound(checkpoint) => {
                                     debug!(checkpoint, "Checkpoint not found, retrying...");
                                     metrics.total_ingested_not_found_retries.inc();
-                                    BE::transient(Break::Err(cp, e))
+                                    BE::transient(e)
                                 }
-                                e => BE::permanent(Break::Err(cp, e)),
+                                e => BE::permanent(e),
                             })
                         }
                     };
@@ -163,7 +156,7 @@ impl IngestionService {
                         if try_join_all(futures).await.is_err() {
                             info!("Subscription dropped, signalling shutdown");
                             supervisor_cancel.cancel();
-                            Err(Break::Cancelled)
+                            Err(Error::Cancelled)
                         } else {
                             Ok(())
                         }
@@ -173,12 +166,12 @@ impl IngestionService {
             {
                 Ok(()) => {}
 
-                Err(Break::Cancelled) => {
+                Err(Error::Cancelled) => {
                     info!("Shutdown received, stopping ingestion service");
                 }
 
-                Err(Break::Err(checkpoint, e)) => {
-                    error!(checkpoint, "Ingestion service failed: {}", e);
+                Err(e) => {
+                    error!("Ingestion service failed: {}", e);
                     cancel.cancel();
                 }
             }

From c31deb28972e6f26ab66459f9935502b7e5aca06 Mon Sep 17 00:00:00 2001
From: Ashok Menon <ashok@mystenlabs.com>
Date: Mon, 14 Oct 2024 19:27:57 +0100
Subject: [PATCH 15/33] indexer-alt: integration diesel

## Description

Initial set-up integrating a DB connection pool into the indexer.

## Test plan

Run the indexer and check the stats from the DB pool are propogated to
prometheus.
---
 .github/workflows/rust.yml                    |   3 +
 Cargo.lock                                    |   3 +
 crates/sui-indexer-alt/Cargo.toml             |   3 +
 crates/sui-indexer-alt/diesel.toml            |   5 +
 crates/sui-indexer-alt/generate_schema.sh     |  77 +++++++
 .../down.sql                                  |   6 +
 .../up.sql                                    |  36 ++++
 .../2024-10-14-123213_checkpoints/down.sql    |   1 +
 .../2024-10-14-123213_checkpoints/up.sql      |   6 +
 crates/sui-indexer-alt/schema.patch           |   7 +
 crates/sui-indexer-alt/src/args.rs            |   5 +-
 crates/sui-indexer-alt/src/db.rs              |  67 ++++++
 crates/sui-indexer-alt/src/lib.rs             |   2 +
 crates/sui-indexer-alt/src/main.rs            |   9 +-
 crates/sui-indexer-alt/src/metrics.rs         | 190 +++++++++++++++++-
 crates/sui-indexer-alt/src/schema.rs          |  11 +
 16 files changed, 427 insertions(+), 4 deletions(-)
 create mode 100644 crates/sui-indexer-alt/diesel.toml
 create mode 100755 crates/sui-indexer-alt/generate_schema.sh
 create mode 100644 crates/sui-indexer-alt/migrations/00000000000000_diesel_initial_setup/down.sql
 create mode 100644 crates/sui-indexer-alt/migrations/00000000000000_diesel_initial_setup/up.sql
 create mode 100644 crates/sui-indexer-alt/migrations/2024-10-14-123213_checkpoints/down.sql
 create mode 100644 crates/sui-indexer-alt/migrations/2024-10-14-123213_checkpoints/up.sql
 create mode 100644 crates/sui-indexer-alt/schema.patch
 create mode 100644 crates/sui-indexer-alt/src/db.rs
 create mode 100644 crates/sui-indexer-alt/src/schema.rs

diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index b43aa15317af7..1226ae33d7710 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -189,6 +189,9 @@ jobs:
       - name: Indexer schema
         run: |
           ./scripts/generate_indexer_schema.sh
+      - name: Indexer Alt schema
+        run: |
+          ./crates/sui-indexer-alt/generate_schema.sh
       # Ensure there are no uncommitted changes in the repo after running tests
       - run: scripts/changed-files.sh
         shell: bash
diff --git a/Cargo.lock b/Cargo.lock
index b2dae713e01de..474d6df586ef4 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -13883,8 +13883,11 @@ dependencies = [
  "anyhow",
  "axum 0.7.5",
  "backoff",
+ "bb8",
  "bcs",
  "clap",
+ "diesel",
+ "diesel-async",
  "futures",
  "mysten-metrics",
  "prometheus",
diff --git a/crates/sui-indexer-alt/Cargo.toml b/crates/sui-indexer-alt/Cargo.toml
index b84e06cd066db..b702bec352619 100644
--- a/crates/sui-indexer-alt/Cargo.toml
+++ b/crates/sui-indexer-alt/Cargo.toml
@@ -14,8 +14,11 @@ path = "src/main.rs"
 anyhow.workspace = true
 axum.workspace = true
 backoff.workspace = true
+bb8 = "0.8.5"
 bcs.workspace = true
 clap.workspace = true
+diesel = { workspace = true, features = ["chrono"] }
+diesel-async = { workspace = true, features = ["bb8", "postgres", "async-connection-wrapper"] }
 futures.workspace = true
 prometheus.workspace = true
 reqwest.workspace = true
diff --git a/crates/sui-indexer-alt/diesel.toml b/crates/sui-indexer-alt/diesel.toml
new file mode 100644
index 0000000000000..0679916a26a25
--- /dev/null
+++ b/crates/sui-indexer-alt/diesel.toml
@@ -0,0 +1,5 @@
+[print_schema]
+file = "src/schema.rs"
+
+[migrations_directory]
+dir = "migrations"
diff --git a/crates/sui-indexer-alt/generate_schema.sh b/crates/sui-indexer-alt/generate_schema.sh
new file mode 100755
index 0000000000000..c65b178011d33
--- /dev/null
+++ b/crates/sui-indexer-alt/generate_schema.sh
@@ -0,0 +1,77 @@
+#!/bin/bash
+# Copyright (c) Mysten Labs, Inc.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Update sui-indexer's generated src/schema.rs based on the schema after
+# running all its migrations on a clean database. Expects the first argument to
+# be a port to run the temporary database on (defaults to 5433).
+
+set -x
+set -e
+
+if ! command -v git &> /dev/null; then
+    echo "Please install git: e.g. brew install git" >&2
+    exit 1
+fi
+
+for PG in psql initdb postgres pg_isready pg_ctl; do
+    if ! command -v $PG &> /dev/null; then
+        echo "Could not find $PG. Please install postgres: e.g. brew install postgresql@15" >&2
+        exit 1
+    fi
+done
+
+if ! command -v diesel &> /dev/null; then
+    echo "Please install diesel: e.g. cargo install diesel_cli --features postgres" >&2
+    exit 1
+fi
+
+REPO=$(git rev-parse --show-toplevel)
+
+# Create a temporary directory to store the ephemeral DB.
+TMP=$(mktemp -d)
+
+# Set-up a trap to clean everything up on EXIT (stop DB, delete temp directory)
+function cleanup {
+  pg_ctl stop -D "$TMP" -mfast
+  set +x
+  echo "Postgres STDOUT:"
+  cat "$TMP/db.stdout"
+  echo "Postgres STDERR:"
+  cat "$TMP/db.stderr"
+  set -x
+  rm -rf "$TMP"
+}
+trap cleanup EXIT
+
+# Create a new database in the temporary directory
+initdb -D "$TMP" --user postgres
+
+# Run the DB in the background, on the port provided and capture its output
+PORT=${1:-5433}
+postgres -D "$TMP" -p "$PORT" -c unix_socket_directories=                      \
+   > "$TMP/db.stdout"                                                          \
+  2> "$TMP/db.stderr"                                                          &
+
+# Wait for postgres to report as ready
+RETRIES=0
+while ! pg_isready -p "$PORT" --host "localhost" --username "postgres"; do
+  if [ $RETRIES -gt 5 ]; then
+    echo "Postgres failed to start" >&2
+    exit 1
+  fi
+  sleep 1
+  RETRIES=$((RETRIES + 1))
+done
+
+# Run all migrations on the new database
+diesel migration run                                                          \
+  --database-url "postgres://postgres:postgrespw@localhost:$PORT"             \
+  --migration-dir "$REPO/crates/sui-indexer-alt/migrations"
+
+# Generate the schema.rs file, excluding partition tables and including the
+# copyright notice.
+diesel print-schema                                                           \
+  --database-url "postgres://postgres:postgrespw@localhost:$PORT"             \
+  --patch-file "$REPO/crates/sui-indexer-alt/schema.patch"                    \
+  > "$REPO/crates/sui-indexer-alt/src/schema.rs"
diff --git a/crates/sui-indexer-alt/migrations/00000000000000_diesel_initial_setup/down.sql b/crates/sui-indexer-alt/migrations/00000000000000_diesel_initial_setup/down.sql
new file mode 100644
index 0000000000000..a9f526091194b
--- /dev/null
+++ b/crates/sui-indexer-alt/migrations/00000000000000_diesel_initial_setup/down.sql
@@ -0,0 +1,6 @@
+-- This file was automatically created by Diesel to setup helper functions
+-- and other internal bookkeeping. This file is safe to edit, any future
+-- changes will be added to existing projects as new migrations.
+
+DROP FUNCTION IF EXISTS diesel_manage_updated_at(_tbl regclass);
+DROP FUNCTION IF EXISTS diesel_set_updated_at();
diff --git a/crates/sui-indexer-alt/migrations/00000000000000_diesel_initial_setup/up.sql b/crates/sui-indexer-alt/migrations/00000000000000_diesel_initial_setup/up.sql
new file mode 100644
index 0000000000000..d68895b1a7b7d
--- /dev/null
+++ b/crates/sui-indexer-alt/migrations/00000000000000_diesel_initial_setup/up.sql
@@ -0,0 +1,36 @@
+-- This file was automatically created by Diesel to setup helper functions
+-- and other internal bookkeeping. This file is safe to edit, any future
+-- changes will be added to existing projects as new migrations.
+
+
+
+
+-- Sets up a trigger for the given table to automatically set a column called
+-- `updated_at` whenever the row is modified (unless `updated_at` was included
+-- in the modified columns)
+--
+-- # Example
+--
+-- ```sql
+-- CREATE TABLE users (id SERIAL PRIMARY KEY, updated_at TIMESTAMP NOT NULL DEFAULT NOW());
+--
+-- SELECT diesel_manage_updated_at('users');
+-- ```
+CREATE OR REPLACE FUNCTION diesel_manage_updated_at(_tbl regclass) RETURNS VOID AS $$
+BEGIN
+    EXECUTE format('CREATE TRIGGER set_updated_at BEFORE UPDATE ON %s
+                    FOR EACH ROW EXECUTE PROCEDURE diesel_set_updated_at()', _tbl);
+END;
+$$ LANGUAGE plpgsql;
+
+CREATE OR REPLACE FUNCTION diesel_set_updated_at() RETURNS trigger AS $$
+BEGIN
+    IF (
+        NEW IS DISTINCT FROM OLD AND
+        NEW.updated_at IS NOT DISTINCT FROM OLD.updated_at
+    ) THEN
+        NEW.updated_at := current_timestamp;
+    END IF;
+    RETURN NEW;
+END;
+$$ LANGUAGE plpgsql;
diff --git a/crates/sui-indexer-alt/migrations/2024-10-14-123213_checkpoints/down.sql b/crates/sui-indexer-alt/migrations/2024-10-14-123213_checkpoints/down.sql
new file mode 100644
index 0000000000000..837ea1e8355cc
--- /dev/null
+++ b/crates/sui-indexer-alt/migrations/2024-10-14-123213_checkpoints/down.sql
@@ -0,0 +1 @@
+DROP TABLE IF EXISTS kv_checkpoints;
diff --git a/crates/sui-indexer-alt/migrations/2024-10-14-123213_checkpoints/up.sql b/crates/sui-indexer-alt/migrations/2024-10-14-123213_checkpoints/up.sql
new file mode 100644
index 0000000000000..f177da0844341
--- /dev/null
+++ b/crates/sui-indexer-alt/migrations/2024-10-14-123213_checkpoints/up.sql
@@ -0,0 +1,6 @@
+CREATE TABLE IF NOT EXISTS kv_checkpoints
+(
+    sequence_number                     BIGINT       PRIMARY KEY,
+    certified_checkpoint                BYTEA        NOT NULL,
+    checkpoint_contents                 BYTEA        NOT NULL
+);
diff --git a/crates/sui-indexer-alt/schema.patch b/crates/sui-indexer-alt/schema.patch
new file mode 100644
index 0000000000000..ee683461f7a7d
--- /dev/null
+++ b/crates/sui-indexer-alt/schema.patch
@@ -0,0 +1,7 @@
+diff --git a/crates/sui-indexer-alt/src/schema.rs b/crates/sui-indexer-alt/src/schema.rs
+--- a/crates/sui-indexer-alt/src/schema.rs
++++ b/crates/sui-indexer-alt/src/schema.rs
+@@ -1 +1,3 @@
++// Copyright (c) Mysten Labs, Inc.
++// SPDX-License-Identifier: Apache-2.0
+ // @generated automatically by Diesel CLI.
diff --git a/crates/sui-indexer-alt/src/args.rs b/crates/sui-indexer-alt/src/args.rs
index e288af455d67c..7766a67b2f2e1 100644
--- a/crates/sui-indexer-alt/src/args.rs
+++ b/crates/sui-indexer-alt/src/args.rs
@@ -3,13 +3,16 @@
 
 use std::net::SocketAddr;
 
-use crate::ingestion::IngestionConfig;
+use crate::{db::DbConfig, ingestion::IngestionConfig};
 
 #[derive(clap::Parser, Debug, Clone)]
 pub struct Args {
     #[command(flatten)]
     pub ingestion: IngestionConfig,
 
+    #[command(flatten)]
+    pub db: DbConfig,
+
     /// Address to serve Prometheus Metrics from.
     #[arg(long, default_value = "0.0.0.0:9184")]
     pub metrics_address: SocketAddr,
diff --git a/crates/sui-indexer-alt/src/db.rs b/crates/sui-indexer-alt/src/db.rs
new file mode 100644
index 0000000000000..8dcfe5660dd27
--- /dev/null
+++ b/crates/sui-indexer-alt/src/db.rs
@@ -0,0 +1,67 @@
+// Copyright (c) Mysten Labs, Inc.
+// SPDX-License-Identifier: Apache-2.0
+
+use std::time::Duration;
+
+use diesel_async::{
+    pooled_connection::{
+        bb8::{Pool, PooledConnection, RunError},
+        AsyncDieselConnectionManager, PoolError,
+    },
+    AsyncPgConnection,
+};
+use url::Url;
+
+#[derive(Clone)]
+pub struct Db {
+    pool: Pool<AsyncPgConnection>,
+}
+
+#[derive(clap::Args, Debug, Clone)]
+pub struct DbConfig {
+    /// The URL of the database to connect to.
+    #[arg(long)]
+    database_url: Url,
+
+    /// Number of connections to keep in the pool.
+    #[arg(long, default_value_t = 100)]
+    connection_pool_size: u32,
+
+    /// Time spent waiting for a connection from the pool to become available.
+    #[arg(
+        long,
+        default_value = "60",
+        value_name = "SECONDS",
+        value_parser = |s: &str| s.parse().map(Duration::from_secs)
+    )]
+    connection_timeout: Duration,
+}
+
+type Connection<'p> = PooledConnection<'p, AsyncPgConnection>;
+
+impl Db {
+    /// Construct a new DB connection pool. Instances of [Db] can be cloned to share access to the
+    /// same pool.
+    pub async fn new(config: DbConfig) -> Result<Self, PoolError> {
+        let manager = AsyncDieselConnectionManager::new(config.database_url.as_str());
+
+        let pool = Pool::builder()
+            .max_size(config.connection_pool_size)
+            .connection_timeout(config.connection_timeout)
+            .build(manager)
+            .await?;
+
+        Ok(Self { pool })
+    }
+
+    /// Retrieves a connection from the pool. Can fail with a timeout if a connection cannot be
+    /// established before the [DbConfig::connection_timeout] has elapsed.
+    pub(crate) async fn connect(&self) -> Result<Connection<'_>, RunError> {
+        self.pool.get().await
+    }
+
+    /// Statistics about the connection pool
+    pub(crate) fn state(&self) -> bb8::State {
+        self.pool.state()
+    }
+}
diff --git a/crates/sui-indexer-alt/src/lib.rs b/crates/sui-indexer-alt/src/lib.rs
index ed434c4f77687..457130213d42a 100644
--- a/crates/sui-indexer-alt/src/lib.rs
+++ b/crates/sui-indexer-alt/src/lib.rs
@@ -2,5 +2,7 @@
 // SPDX-License-Identifier: Apache-2.0
 
 pub mod args;
+pub mod db;
 pub mod ingestion;
 pub mod metrics;
+mod schema;
diff --git a/crates/sui-indexer-alt/src/main.rs b/crates/sui-indexer-alt/src/main.rs
index a9b8e585fc5b3..1322970ee6ec9 100644
--- a/crates/sui-indexer-alt/src/main.rs
+++ b/crates/sui-indexer-alt/src/main.rs
@@ -4,7 +4,7 @@
 use anyhow::{Context, Result};
 use clap::Parser;
 use mysten_metrics::spawn_monitored_task;
-use sui_indexer_alt::{args::Args, ingestion::IngestionService, metrics::MetricsService};
+use sui_indexer_alt::{args::Args, db::Db, ingestion::IngestionService, metrics::MetricsService};
 use tokio::{signal, task::JoinHandle};
 use tokio_util::sync::CancellationToken;
 use tracing::info;
@@ -20,7 +20,12 @@ async fn main() -> Result<()> {
 
     let cancel = CancellationToken::new();
 
-    let (metrics, metrics_service) = MetricsService::new(args.metrics_address, cancel.clone())?;
+    let db = Db::new(args.db)
+        .await
+        .context("Failed to connect to database")?;
+
+    let (metrics, metrics_service) =
+        MetricsService::new(args.metrics_address, db.clone(), cancel.clone())?;
     let mut ingestion_service =
         IngestionService::new(args.ingestion, metrics.clone(), cancel.clone())?;
 
diff --git a/crates/sui-indexer-alt/src/metrics.rs b/crates/sui-indexer-alt/src/metrics.rs
index 399bc38dae39c..8da21b850ec66 100644
--- a/crates/sui-indexer-alt/src/metrics.rs
+++ b/crates/sui-indexer-alt/src/metrics.rs
@@ -7,6 +7,8 @@ use anyhow::Result;
 use axum::{extract::Extension, routing::get, Router};
 use mysten_metrics::RegistryService;
 use prometheus::{
+    core::{Collector, Desc},
+    proto::{Counter, Gauge, LabelPair, Metric, MetricFamily, MetricType, Summary},
     register_histogram_with_registry, register_int_counter_vec_with_registry,
     register_int_counter_with_registry, Histogram, IntCounter, IntCounterVec, Registry,
 };
@@ -14,7 +16,7 @@ use tokio::{net::TcpListener, task::JoinHandle};
 use tokio_util::sync::CancellationToken;
 use tracing::{info, warn};
 
-use crate::ingestion::error::Error;
+use crate::{db::Db, ingestion::error::Error};
 
 /// Histogram buckets for the distribution of checkpoint fetching latencies.
 const INGESTION_LATENCY_SEC_BUCKETS: &[f64] = &[
@@ -45,17 +47,26 @@ pub struct IndexerMetrics {
     pub ingested_checkpoint_latency: Histogram,
 }
 
+/// Collects information about the database connection pool.
+struct DbConnectionStatsCollector {
+    db: Db,
+    desc: Vec<(MetricType, Desc)>,
+}
+
 impl MetricsService {
     /// Create a new metrics service, exposing Mysten-wide metrics, and Indexer-specific metrics.
     /// Returns the Indexer-specific metrics and the service itself (which must be run with
     /// [Self::run]).
     pub fn new(
         addr: SocketAddr,
+        db: Db,
         cancel: CancellationToken,
     ) -> Result<(IndexerMetrics, MetricsService)> {
         let registry = Registry::new_custom(Some("indexer_alt".to_string()), None)?;
+
         let metrics = IndexerMetrics::new(&registry);
         mysten_metrics::init_metrics(&registry);
+        registry.register(Box::new(DbConnectionStatsCollector::new(db)))?;
 
         let service = Self {
             addr,
@@ -168,6 +179,183 @@ impl IndexerMetrics {
     }
 }
 
+impl DbConnectionStatsCollector {
+    fn new(db: Db) -> Self {
+        let desc = vec![
+            (
+                MetricType::GAUGE,
+                desc(
+                    "db_connections",
+                    "Number of connections currently being managed by the pool",
+                ),
+            ),
+            (
+                MetricType::GAUGE,
+                desc(
+                    "db_idle_connections",
+                    "Number of idle connections in the pool",
+                ),
+            ),
+            (
+                MetricType::COUNTER,
+                desc("db_connect_direct", "Connections that did not have to wait"),
+            ),
+            (
+                MetricType::SUMMARY,
+                desc("db_connect_waited", "Connections that had to wait"),
+            ),
+            (
+                MetricType::COUNTER,
+                desc(
+                    "db_connect_timed_out",
+                    "Connections that timed out waiting for a connection",
+                ),
+            ),
+            (
+                MetricType::COUNTER,
+                desc(
+                    "db_connections_created",
+                    "Connections that have been created in the pool",
+                ),
+            ),
+            (
+                MetricType::COUNTER,
+                desc_with_labels(
+                    "db_connections_closed",
+                    "Total connections that were closed",
+                    &["reason"],
+                ),
+            ),
+        ];
+
+        Self { db, desc }
+    }
+}
+
+impl Collector for DbConnectionStatsCollector {
+    fn desc(&self) -> Vec<&Desc> {
+        self.desc.iter().map(|d| &d.1).collect()
+    }
+
+    fn collect(&self) -> Vec<MetricFamily> {
+        let state = self.db.state();
+        let stats = state.statistics;
+
+        vec![
+            gauge(&self.desc[0].1, state.connections as f64),
+            gauge(&self.desc[1].1, state.idle_connections as f64),
+            counter(&self.desc[2].1, stats.get_direct as f64),
+            summary(
+                &self.desc[3].1,
+                stats.get_wait_time.as_millis() as f64,
+                stats.get_waited + stats.get_timed_out,
+            ),
+            counter(&self.desc[4].1, stats.get_timed_out as f64),
+            counter(&self.desc[5].1, stats.connections_created as f64),
+            counter_with_labels(
+                &self.desc[6].1,
+                &[
+                    ("reason", "broken", stats.connections_closed_broken as f64),
+                    ("reason", "invalid", stats.connections_closed_invalid as f64),
+                    (
+                        "reason",
+                        "max_lifetime",
+                        stats.connections_closed_max_lifetime as f64,
+                    ),
+                    (
+                        "reason",
+                        "idle_timeout",
+                        stats.connections_closed_idle_timeout as f64,
+                    ),
+                ],
+            ),
+        ]
+    }
+}
+
+fn desc(name: &str, help: &str) -> Desc {
+    desc_with_labels(name, help, &[])
+}
+
+fn desc_with_labels(name: &str, help: &str, labels: &[&str]) -> Desc {
+    Desc::new(
+        name.to_string(),
+        help.to_string(),
+        labels.iter().map(|s| s.to_string()).collect(),
+        Default::default(),
+    )
+    .expect("Bad metric description")
+}
+
+fn gauge(desc: &Desc, value: f64) -> MetricFamily {
+    let mut g = Gauge::default();
+    let mut m = Metric::default();
+    let mut mf = MetricFamily::new();
+
+    g.set_value(value);
+    m.set_gauge(g);
+
+    mf.mut_metric().push(m);
+    mf.set_name(desc.fq_name.clone());
+    mf.set_help(desc.help.clone());
+    mf.set_field_type(MetricType::COUNTER);
+    mf
+}
+
+fn counter(desc: &Desc, value: f64) -> MetricFamily {
+    let mut c = Counter::default();
+    let mut m = Metric::default();
+    let mut mf = MetricFamily::new();
+
+    c.set_value(value);
+    m.set_counter(c);
+
+    mf.mut_metric().push(m);
+    mf.set_name(desc.fq_name.clone());
+    mf.set_help(desc.help.clone());
+    mf.set_field_type(MetricType::GAUGE);
+    mf
+}
+
+fn counter_with_labels(desc: &Desc, values: &[(&str, &str, f64)]) -> MetricFamily {
+    let mut mf = MetricFamily::new();
+
+    for (name, label, value) in values {
+        let mut c = Counter::default();
+        let mut l = LabelPair::default();
+        let mut m = Metric::default();
+
+        c.set_value(*value);
+        l.set_name(name.to_string());
+        l.set_value(label.to_string());
+
+        m.set_counter(c);
+        m.mut_label().push(l);
+        mf.mut_metric().push(m);
+    }
+
+    mf.set_name(desc.fq_name.clone());
+    mf.set_help(desc.help.clone());
+    mf.set_field_type(MetricType::COUNTER);
+    mf
+}
+
+fn summary(desc: &Desc, sum: f64, count: u64) -> MetricFamily {
+    let mut s = Summary::default();
+    let mut m = Metric::default();
+    let mut mf = MetricFamily::new();
+
+    s.set_sample_sum(sum);
+    s.set_sample_count(count);
+    m.set_summary(s);
+
+    mf.mut_metric().push(m);
+    mf.set_name(desc.fq_name.clone());
+    mf.set_help(desc.help.clone());
+    mf.set_field_type(MetricType::SUMMARY);
+    mf
+}
+
 #[cfg(test)]
 pub(crate) mod tests {
     use prometheus::Registry;
diff --git a/crates/sui-indexer-alt/src/schema.rs b/crates/sui-indexer-alt/src/schema.rs
new file mode 100644
index 0000000000000..ffaf45f9bc6eb
--- /dev/null
+++ b/crates/sui-indexer-alt/src/schema.rs
@@ -0,0 +1,11 @@
+// Copyright (c) Mysten Labs, Inc.
+// SPDX-License-Identifier: Apache-2.0
+// @generated automatically by Diesel CLI.
+
+diesel::table! {
+    kv_checkpoints (sequence_number) {
+        sequence_number -> Int8,
+        certified_checkpoint -> Bytea,
+        checkpoint_contents -> Bytea,
+    }
+}

From b9784dac6c525634c66d1ea7221f8446911199c6 Mon Sep 17 00:00:00 2001
From: Ashok Menon <ashok@mystenlabs.com>
Date: Mon, 14 Oct 2024 20:35:00 +0100
Subject: [PATCH 16/33] indexer-alt: graceful shutdown on panic

## Description

Handle the case where one part of the indexer panics and we need to
cleanly shutdown.

## Test plan

Introduced a panic in the dummy ingester and make sure everything winds
down correctly.
---
 crates/sui-indexer-alt/src/lib.rs  |  1 +
 crates/sui-indexer-alt/src/main.rs | 16 ++++++-------
 crates/sui-indexer-alt/src/task.rs | 37 ++++++++++++++++++++++++++++++
 3 files changed, 46 insertions(+), 8 deletions(-)
 create mode 100644 crates/sui-indexer-alt/src/task.rs

diff --git a/crates/sui-indexer-alt/src/lib.rs b/crates/sui-indexer-alt/src/lib.rs
index 457130213d42a..222563609c8cf 100644
--- a/crates/sui-indexer-alt/src/lib.rs
+++ b/crates/sui-indexer-alt/src/lib.rs
@@ -6,3 +6,4 @@ pub mod db;
 pub mod ingestion;
 pub mod metrics;
 mod schema;
+pub mod task;
diff --git a/crates/sui-indexer-alt/src/main.rs b/crates/sui-indexer-alt/src/main.rs
index 1322970ee6ec9..49ccf642576f6 100644
--- a/crates/sui-indexer-alt/src/main.rs
+++ b/crates/sui-indexer-alt/src/main.rs
@@ -4,8 +4,11 @@
 use anyhow::{Context, Result};
 use clap::Parser;
 use mysten_metrics::spawn_monitored_task;
-use sui_indexer_alt::{args::Args, db::Db, ingestion::IngestionService, metrics::MetricsService};
-use tokio::{signal, task::JoinHandle};
+use sui_indexer_alt::{
+    args::Args, db::Db, ingestion::IngestionService, metrics::MetricsService,
+    task::graceful_shutdown,
+};
+use tokio::task::JoinHandle;
 use tokio_util::sync::CancellationToken;
 use tracing::info;
 
@@ -41,12 +44,9 @@ async fn main() -> Result<()> {
         .await
         .context("Failed to start ingestion service")?;
 
-    // Once we receive a Ctrl-C, notify all services to shutdown, and wait for them to finish.
-    signal::ctrl_c().await.unwrap();
-    cancel.cancel();
-    ingester_handle.await.unwrap();
-    metrics_handle.await.unwrap();
-    ingestion_handle.await.unwrap();
+    // Once we receive a Ctrl-C or one of the services panics or is cancelled, notify all services
+    // to shutdown, and wait for them to finish.
+    graceful_shutdown([ingester_handle, metrics_handle, ingestion_handle], cancel).await;
 
     Ok(())
 }
diff --git a/crates/sui-indexer-alt/src/task.rs b/crates/sui-indexer-alt/src/task.rs
new file mode 100644
index 0000000000000..c4fd802d8afb1
--- /dev/null
+++ b/crates/sui-indexer-alt/src/task.rs
@@ -0,0 +1,37 @@
+// Copyright (c) Mysten Labs, Inc.
+// SPDX-License-Identifier: Apache-2.0
+
+use std::iter;
+
+use futures::future::{self, Either};
+use tokio::{signal, task::JoinHandle};
+use tokio_util::sync::CancellationToken;
+
+/// Manages cleanly exiting the process, either because one of its constituent services has stopped
+/// or because an interrupt signal was sent to the process.
+pub async fn graceful_shutdown(
+    services: impl IntoIterator<Item = JoinHandle<()>>,
+    cancel: CancellationToken,
+) {
+    let interrupt = async {
+        signal::ctrl_c()
+            .await
+            .expect("Failed to listen for Ctrl-C signal");
+
+        Ok(())
+    };
+
+    tokio::pin!(interrupt);
+    let futures: Vec<_> = services
+        .into_iter()
+        .map(Either::Left)
+        .chain(iter::once(Either::Right(interrupt)))
+        .collect();
+
+    // Wait for the first service to finish, or for an interrupt signal.
+    let (_, _, rest) = future::select_all(futures).await;
+    cancel.cancel();
+
+    // Wait for the remaining services to finish.
+    let _ = future::join_all(rest).await;
+}

From b18512f4a0433b2f35ec60ca2cbf0372b11d4bd4 Mon Sep 17 00:00:00 2001
From: Ashok Menon <ashok@mystenlabs.com>
Date: Tue, 15 Oct 2024 15:16:45 +0100
Subject: [PATCH 17/33] indexer-alt: indexing pipelines + checkpoint indexing

## Description

Introduce a framework for writing indexing pipelines and use it to add a
pipeline for checkpoints.

## Test plan

Tested the pipeline locally.
---
 Cargo.lock                                    |   2 +
 crates/sui-indexer-alt/Cargo.toml             |   2 +
 crates/sui-indexer-alt/src/args.rs            |   5 +-
 crates/sui-indexer-alt/src/db.rs              |   2 +-
 .../src/handlers/kv_checkpoints.rs            |  40 ++
 crates/sui-indexer-alt/src/handlers/mod.rs    | 424 ++++++++++++++++++
 .../sui-indexer-alt/src/ingestion/client.rs   |   6 +-
 crates/sui-indexer-alt/src/ingestion/mod.rs   |   5 +-
 crates/sui-indexer-alt/src/lib.rs             |   4 +-
 crates/sui-indexer-alt/src/main.rs            |  46 +-
 crates/sui-indexer-alt/src/metrics.rs         | 136 +++++-
 .../sui-indexer-alt/src/models/checkpoints.rs |  15 +
 crates/sui-indexer-alt/src/models/mod.rs      |   4 +
 13 files changed, 649 insertions(+), 42 deletions(-)
 create mode 100644 crates/sui-indexer-alt/src/handlers/kv_checkpoints.rs
 create mode 100644 crates/sui-indexer-alt/src/handlers/mod.rs
 create mode 100644 crates/sui-indexer-alt/src/models/checkpoints.rs
 create mode 100644 crates/sui-indexer-alt/src/models/mod.rs

diff --git a/Cargo.lock b/Cargo.lock
index 474d6df586ef4..a80c6a43b4812 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -13881,6 +13881,7 @@ name = "sui-indexer-alt"
 version = "1.37.0"
 dependencies = [
  "anyhow",
+ "async-trait",
  "axum 0.7.5",
  "backoff",
  "bb8",
@@ -13899,6 +13900,7 @@ dependencies = [
  "telemetry-subscribers",
  "thiserror",
  "tokio",
+ "tokio-stream",
  "tokio-util 0.7.10",
  "tracing",
  "url",
diff --git a/crates/sui-indexer-alt/Cargo.toml b/crates/sui-indexer-alt/Cargo.toml
index b702bec352619..83305df9fc2cb 100644
--- a/crates/sui-indexer-alt/Cargo.toml
+++ b/crates/sui-indexer-alt/Cargo.toml
@@ -12,6 +12,7 @@ path = "src/main.rs"
 
 [dependencies]
 anyhow.workspace = true
+async-trait.workspace = true
 axum.workspace = true
 backoff.workspace = true
 bb8 = "0.8.5"
@@ -26,6 +27,7 @@ serde.workspace = true
 telemetry-subscribers.workspace = true
 thiserror.workspace = true
 tokio.workspace = true
+tokio-stream.workspace = true
 tokio-util.workspace = true
 tracing.workspace = true
 url.workspace = true
diff --git a/crates/sui-indexer-alt/src/args.rs b/crates/sui-indexer-alt/src/args.rs
index 7766a67b2f2e1..adbf028c9b57c 100644
--- a/crates/sui-indexer-alt/src/args.rs
+++ b/crates/sui-indexer-alt/src/args.rs
@@ -3,7 +3,7 @@
 
 use std::net::SocketAddr;
 
-use crate::{db::DbConfig, ingestion::IngestionConfig};
+use crate::{db::DbConfig, handlers::CommitterConfig, ingestion::IngestionConfig};
 
 #[derive(clap::Parser, Debug, Clone)]
 pub struct Args {
@@ -13,6 +13,9 @@ pub struct Args {
     #[command(flatten)]
     pub db: DbConfig,
 
+    #[command(flatten)]
+    pub committer: CommitterConfig,
+
     /// Address to serve Prometheus Metrics from.
     #[arg(long, default_value = "0.0.0.0:9184")]
     pub metrics_address: SocketAddr,
diff --git a/crates/sui-indexer-alt/src/db.rs b/crates/sui-indexer-alt/src/db.rs
index 8dcfe5660dd27..99fe94c9fb7fb 100644
--- a/crates/sui-indexer-alt/src/db.rs
+++ b/crates/sui-indexer-alt/src/db.rs
@@ -37,7 +37,7 @@ pub struct DbConfig {
     connection_timeout: Duration,
 }
 
-type Connection<'p> = PooledConnection<'p, AsyncPgConnection>;
+pub type Connection<'p> = PooledConnection<'p, AsyncPgConnection>;
 
 impl Db {
     /// Construct a new DB connection pool. Instances of [Db] can be cloned to share access to the
diff --git a/crates/sui-indexer-alt/src/handlers/kv_checkpoints.rs b/crates/sui-indexer-alt/src/handlers/kv_checkpoints.rs
new file mode 100644
index 0000000000000..b310a088ebd07
--- /dev/null
+++ b/crates/sui-indexer-alt/src/handlers/kv_checkpoints.rs
@@ -0,0 +1,40 @@
+// Copyright (c) Mysten Labs, Inc.
+// SPDX-License-Identifier: Apache-2.0
+
+use std::sync::Arc;
+
+use anyhow::{Context, Result};
+use diesel_async::RunQueryDsl;
+use sui_types::full_checkpoint_content::CheckpointData;
+
+use crate::{db, models::checkpoints::StoredCheckpoint, schema::kv_checkpoints};
+
+use super::Handler;
+
+pub struct KvCheckpoints;
+
+#[async_trait::async_trait]
+impl Handler for KvCheckpoints {
+    const NAME: &'static str = "kv_checkpoints";
+
+    type Value = StoredCheckpoint;
+
+    fn handle(checkpoint: &Arc<CheckpointData>) -> Result<Vec<Self::Value>> {
+        let sequence_number = checkpoint.checkpoint_summary.sequence_number as i64;
+        Ok(vec![StoredCheckpoint {
+            sequence_number,
+            certified_checkpoint: bcs::to_bytes(&checkpoint.checkpoint_summary)
+                .with_context(|| format!("Serializing checkpoint {sequence_number} summary"))?,
+            checkpoint_contents: bcs::to_bytes(&checkpoint.checkpoint_contents)
+                .with_context(|| format!("Serializing checkpoint {sequence_number} contents"))?,
+        }])
+    }
+
+    async fn commit(values: &[Self::Value], conn: &mut db::Connection<'_>) -> Result<usize> {
+        Ok(diesel::insert_into(kv_checkpoints::table)
+            .values(values)
+            .on_conflict_do_nothing()
+            .execute(conn)
+            .await?)
+    }
+}
diff --git a/crates/sui-indexer-alt/src/handlers/mod.rs b/crates/sui-indexer-alt/src/handlers/mod.rs
new file mode 100644
index 0000000000000..5bfc7bb0fd039
--- /dev/null
+++ b/crates/sui-indexer-alt/src/handlers/mod.rs
@@ -0,0 +1,424 @@
+// Copyright (c) Mysten Labs, Inc.
+// SPDX-License-Identifier: Apache-2.0
+
+use std::{collections::BTreeMap, sync::Arc, time::Duration};
+
+use futures::TryStreamExt;
+use mysten_metrics::spawn_monitored_task;
+use sui_types::full_checkpoint_content::CheckpointData;
+use tokio::{
+    sync::mpsc,
+    task::JoinHandle,
+    time::{interval, MissedTickBehavior},
+};
+use tokio_stream::{wrappers::ReceiverStream, StreamExt};
+use tokio_util::sync::CancellationToken;
+use tracing::{debug, error, info, warn};
+
+use crate::{
+    db::{self, Db},
+    metrics::IndexerMetrics,
+};
+
+pub mod kv_checkpoints;
+
+/// Extra buffer added to the channel between the handler and the committer. There does not need to
+/// be a huge capacity here because the committer is already buffering rows to insert internally.
+const COMMITTER_BUFFER: usize = 5;
+
+/// The committer will wait at least this long between commits for any given pipeline.
+const COOLDOWN_INTERVAL: Duration = Duration::from_millis(20);
+
+/// The committer will wait at least this long between attempts to commit a failed batch.
+const RETRY_INTERVAL: Duration = Duration::from_millis(100);
+
+/// Handlers implement the logic for a given indexing pipeline: How to process checkpoint data into
+/// rows for their table, and how to write those rows to the database.
+///
+/// The handler is also responsible for tuning the various parameters of the pipeline (provided as
+/// associated values). Reasonable defaults have been chosen to balance concurrency with memory
+/// usage, but each handle may choose to override these defaults, e.g.
+///
+/// - Handlers that produce many small rows may wish to increase their batch/chunk/max-pending
+///   sizes).
+/// - Handlers that do more work during processing may wish to increase their fanout so more of it
+///   can be done concurrently, to preserve throughput.
+#[async_trait::async_trait]
+pub trait Handler {
+    /// Used to identify the pipeline in logs and metrics.
+    const NAME: &'static str;
+
+    /// How much concurrency to use when processing checkpoint data.
+    const FANOUT: usize = 10;
+
+    /// If at least this many rows are pending, the committer will commit them eagerly.
+    const BATCH_SIZE: usize = 50;
+
+    /// If there are more than this many rows pending, the committer will only commit this many in
+    /// one operation.
+    const CHUNK_SIZE: usize = 200;
+
+    /// If there are more than this many rows pending, the committer applies backpressure.
+    const MAX_PENDING_SIZE: usize = 1000;
+
+    /// The type of value being inserted by the handler.
+    type Value: Send + Sync + 'static;
+
+    /// The processing logic for turning a checkpoint into rows of the table.
+    fn handle(checkpoint: &Arc<CheckpointData>) -> anyhow::Result<Vec<Self::Value>>;
+
+    /// Take a chunk of values and commit them to the database, returning the number of rows
+    /// affected.
+    async fn commit(values: &[Self::Value], conn: &mut db::Connection<'_>)
+        -> anyhow::Result<usize>;
+}
+
+#[derive(clap::Args, Debug, Clone)]
+pub struct CommitterConfig {
+    /// Committer will check for pending data at least this often
+    #[arg(
+        long,
+        default_value = "500",
+        value_name = "MILLISECONDS",
+        value_parser = |s: &str| s.parse().map(Duration::from_millis),
+    )]
+    commit_interval: Duration,
+}
+
+/// A batch of processed values associated with a single checkpoint. This is an internal type used
+/// to communicate between the handler and the committer parts of the pipeline.
+struct Indexed<H: Handler> {
+    sequence_number: u64,
+    values: Vec<H::Value>,
+}
+
+/// Start a new indexing pipeline served by the handler, `H`. Each pipeline consists of a handler
+/// task which takes checkpoint data and breaks it down into rows, ready for insertion, and a
+/// committer which writes those rows out to the database.
+///
+/// Checkpoint data is fed into the pipeline through the `handler_rx` channel, and an internal
+/// channel is created to communicate checkpoint-wise data to the committer. The pipeline can be
+/// shutdown using its `cancel` token.
+pub fn pipeline<H: Handler + 'static>(
+    db: Db,
+    handler_rx: mpsc::Receiver<Arc<CheckpointData>>,
+    config: CommitterConfig,
+    metrics: Arc<IndexerMetrics>,
+    cancel: CancellationToken,
+) -> (JoinHandle<()>, JoinHandle<()>) {
+    let (handler_tx, committer_rx) = mpsc::channel(H::FANOUT + COMMITTER_BUFFER);
+
+    let handler = handler::<H>(handler_rx, handler_tx, metrics.clone(), cancel.clone());
+    let committer = committer::<H>(db, committer_rx, config, metrics, cancel);
+
+    (handler, committer)
+}
+
+/// The handler task is responsible for taking checkpoint data and breaking it down into rows ready
+/// to commit. It spins up a supervisor that waits on the `rx` channel for checkpoints, and
+/// distributes them among `H::FANOUT` workers.
+///
+/// Each worker processes a checkpoint into rows and sends them on to the committer using the `tx`
+/// channel.
+///
+/// The task will shutdown if the `cancel` token is cancelled, or if any of the workers encounters
+/// an error -- there is no retry logic at this level.
+fn handler<H: Handler + 'static>(
+    rx: mpsc::Receiver<Arc<CheckpointData>>,
+    tx: mpsc::Sender<Indexed<H>>,
+    metrics: Arc<IndexerMetrics>,
+    cancel: CancellationToken,
+) -> JoinHandle<()> {
+    /// Internal type used by workers to propagate errors or shutdown signals up to their
+    /// supervisor.
+    #[derive(thiserror::Error, Debug)]
+    enum Break {
+        #[error("Shutdown received")]
+        Cancel,
+
+        #[error(transparent)]
+        Err(#[from] anyhow::Error),
+    }
+
+    spawn_monitored_task!(async move {
+        match ReceiverStream::new(rx)
+            .map(Ok)
+            .try_for_each_concurrent(H::FANOUT, |checkpoint| {
+                let tx = tx.clone();
+                let metrics = metrics.clone();
+                let cancel = cancel.clone();
+                async move {
+                    if cancel.is_cancelled() {
+                        return Err(Break::Cancel);
+                    }
+
+                    metrics
+                        .total_handler_checkpoints_received
+                        .with_label_values(&[H::NAME])
+                        .inc();
+
+                    let guard = metrics
+                        .handler_checkpoint_latency
+                        .with_label_values(&[H::NAME])
+                        .start_timer();
+
+                    let values = H::handle(&checkpoint)?;
+                    let elapsed = guard.stop_and_record();
+
+                    debug!(
+                        pipeline = H::NAME,
+                        elapsed_ms = elapsed * 1000.0,
+                        "Processed checkpoint",
+                    );
+
+                    metrics
+                        .total_handler_checkpoints_processed
+                        .with_label_values(&[H::NAME])
+                        .inc();
+
+                    metrics
+                        .total_handler_rows_created
+                        .with_label_values(&[H::NAME])
+                        .inc_by(values.len() as u64);
+
+                    tx.send(Indexed {
+                        sequence_number: checkpoint.checkpoint_summary.sequence_number,
+                        values,
+                    })
+                    .await
+                    .map_err(|_| Break::Cancel)?;
+
+                    Ok(())
+                }
+            })
+            .await
+        {
+            Ok(()) | Err(Break::Cancel) => {
+                info!(pipeline = H::NAME, "Shutdown received, stopping handler");
+            }
+
+            Err(Break::Err(e)) => {
+                error!(pipeline = H::NAME, "Error from handler: {e}");
+                cancel.cancel();
+            }
+        };
+    })
+}
+
+/// The commiter task is responsible for gathering rows to write to the database. It is a single
+/// work loop that gathers checkpoint-wise row information, and periodically writes them out to the
+/// database.
+///
+/// The period between writes is controlled by the following factors:
+///
+/// - Time since the last write (controlled by `config.commit_interval`). If there are rows pending
+///   and this interval has elapsed since the last attempted commit, the committer will attempt
+///   another write.
+///
+/// - Time since last attempted write (controlled by `COOLDOWN_INTERVAL` and `RETRY_INTERVAL`). If
+///   there was a recent successful write, the next write will wait at least `COOLDOWN_INTERVAL`,
+///   and if there was a recent unsuccessful write, the next write will wait at least
+///   `RETRY_INTERVAL`. This is to prevent one committer from hogging the database.
+///
+/// - Number of pending rows. If this exceeds `H::BATCH_SIZE`, the committer will attempt to write
+///   out at most `H::CHUNK_SIZE` worth of rows to the DB.
+///
+/// If a write fails, the commiter will save the batch it meant to write and try to write them
+/// again at the next opportunity, potentially adding more rows to the batch if more have arrived
+/// in the interim.
+///
+/// This task will shutdown if canceled via the `cancel` token, or if the channel it receives data
+/// on has been closed by the handler for some reason.
+fn committer<H: Handler + 'static>(
+    db: Db,
+    mut rx: mpsc::Receiver<Indexed<H>>,
+    config: CommitterConfig,
+    metrics: Arc<IndexerMetrics>,
+    cancel: CancellationToken,
+) -> JoinHandle<()> {
+    spawn_monitored_task!(async move {
+        // The `poll` interval controls the maximum time to wait between commits, regardless of the
+        // amount of data available.
+        let mut poll = interval(config.commit_interval);
+        let mut cool = interval(COOLDOWN_INTERVAL);
+
+        // We don't care about keeping a regular cadence -- these intervals are used to guarantee
+        // things are spaced at out relative to each other.
+        poll.set_missed_tick_behavior(MissedTickBehavior::Delay);
+        cool.set_missed_tick_behavior(MissedTickBehavior::Delay);
+
+        // Buffer to gather the next batch to write. This may be non-empty at the top of a tick of
+        // the commiter's loop if the previous attempt at a write failed. Attempt is incremented
+        // every time a batch write fails, and is reset when it succeeds.
+        let mut attempt = 0;
+        let mut batch = vec![];
+
+        // Data for checkpoints that haven't been written yet. Note that `pending_rows` includes
+        // rows in `batch`.
+        let mut pending: BTreeMap<u64, Vec<H::Value>> = BTreeMap::new();
+        let mut pending_rows = 0;
+
+        // TODO: Track the watermark (keep track of out-of-order commits, and update the watermark
+        // table when we accumulate a run of contiguous commits starting from the current watermark).
+
+        loop {
+            tokio::select! {
+                // Break ties in favour of operations that reduce the size of the buffer.
+                biased;
+
+                _ = cancel.cancelled() => {
+                    info!(pipline = H::NAME, "Shutdown received, stopping committer");
+                    break;
+                }
+
+                // Time to write out another batch of rows, if there are any.
+                _ = poll.tick(), if pending_rows > 0 => {
+                    let Ok(mut conn) = db.connect().await else {
+                        warn!(pipeline = H::NAME, "Failed to get connection for DB");
+                        cool.reset();
+                        continue;
+                    };
+
+                    let guard = metrics
+                        .committer_gather_latency
+                        .with_label_values(&[H::NAME])
+                        .start_timer();
+
+                    while batch.len() < H::CHUNK_SIZE {
+                        let Some(mut entry) = pending.first_entry() else {
+                            break;
+                        };
+
+                        let values = entry.get_mut();
+                        if batch.len() + values.len() > H::CHUNK_SIZE {
+                            let mut for_batch = values.split_off(H::CHUNK_SIZE - batch.len());
+                            std::mem::swap(values, &mut for_batch);
+                            batch.extend(for_batch);
+                            break;
+                        } else {
+                            batch.extend(entry.remove());
+                        }
+                    }
+
+                    let elapsed = guard.stop_and_record();
+                    debug!(
+                        pipeline = H::NAME,
+                        elapsed_ms = elapsed * 1000.0,
+                        rows = batch.len(),
+                        pending = pending_rows,
+                        "Gathered batch",
+                    );
+
+                    // TODO (optimization): Switch to COPY FROM, which should offer faster inserts?
+                    //
+                    // Note that COPY FROM cannot handle conflicts -- we need a way to gracefully
+                    // fail over to `INSERT INTO ... ON CONFLICT DO NOTHING` if we encounter a
+                    // conflict, and in that case, we are also subject to the same constraints on
+                    // number of bind parameters as we had before.
+                    //
+                    // Postgres 17 supports an ON_ERROR option for COPY FROM which can ignore bad
+                    // rows, but CloudSQL does not support Postgres 17 yet, and this directive only
+                    // works if the FORMAT is set to TEXT or CSV, which are less efficient over the
+                    // wire.
+                    //
+                    // The hope is that in the steady state, there will not be conflicts (they
+                    // should only show up during backfills, or when we are handing off between
+                    // indexers), so we can use a fallback mechanism for those cases but rely on
+                    // COPY FROM most of the time.
+                    //
+                    // Note that the introduction of watermarks also complicates hand-over between
+                    // two indexers writing to the same table: They cannot both update the
+                    // watermark. One needs to subordinate to the other (or we need to set the
+                    // watermark to the max of what is currently set and what was about to be
+                    // written).
+
+                    metrics
+                        .total_committer_batches_attempted
+                        .with_label_values(&[H::NAME])
+                        .inc();
+
+                    metrics
+                        .committer_batch_size
+                        .with_label_values(&[H::NAME])
+                        .observe(batch.len() as f64);
+
+                    let guard = metrics
+                        .committer_commit_latency
+                        .with_label_values(&[H::NAME])
+                        .start_timer();
+
+                    match H::commit(&batch, &mut conn).await {
+                        Ok(affected) => {
+                            let elapsed = guard.stop_and_record();
+
+                            metrics
+                                .total_committer_rows_committed
+                                .with_label_values(&[H::NAME])
+                                .inc_by(batch.len() as u64);
+
+                            metrics
+                                .total_committer_rows_affected
+                                .with_label_values(&[H::NAME])
+                                .inc_by(affected as u64);
+
+                            debug!(
+                                pipeline = H::NAME,
+                                elapsed_ms = elapsed * 1000.0,
+                                attempt,
+                                affected,
+                                committed = batch.len(),
+                                pending = pending_rows,
+                                "Wrote batch",
+                            );
+
+                            pending_rows -= batch.len();
+                            attempt = 0;
+
+                            batch.clear();
+                            cool.reset();
+                        }
+
+                        Err(e) => {
+                            let elapsed = guard.stop_and_record();
+
+                            error!(
+                                pipeline = H::NAME,
+                                elapsed_ms = elapsed * 1000.0,
+                                attempt,
+                                committed = batch.len(),
+                                pending = pending_rows,
+                                "Error writing batch: {e}",
+                            );
+
+                            cool.reset_after(RETRY_INTERVAL);
+                            attempt += 1;
+                        }
+                    }
+
+                }
+
+                // If there are enough pending rows, and we've expended the cooldown, reset the
+                // commit polling interval so that on the next iteration of the loop, we will write
+                // out another batch.
+                _ = cool.tick(), if pending_rows > H::BATCH_SIZE => {
+                    poll.reset_immediately();
+                }
+
+                indexed = rx.recv(), if pending_rows < H::MAX_PENDING_SIZE => {
+                    if let Some(Indexed { sequence_number, values }) = indexed {
+                        metrics
+                            .total_committer_rows_received
+                            .with_label_values(&[H::NAME])
+                            .inc_by(values.len() as u64);
+
+                        pending_rows += values.len();
+                        pending.insert(sequence_number, values);
+                    } else {
+                        info!(pipeline = H::NAME, "Handler closed channel, stopping committer");
+                        break;
+                    }
+                }
+            }
+        }
+    })
+}
diff --git a/crates/sui-indexer-alt/src/ingestion/client.rs b/crates/sui-indexer-alt/src/ingestion/client.rs
index bfe8c3c9286c5..4edb2999d869a 100644
--- a/crates/sui-indexer-alt/src/ingestion/client.rs
+++ b/crates/sui-indexer-alt/src/ingestion/client.rs
@@ -144,10 +144,12 @@ impl IngestionClient {
 
         debug!(
             checkpoint,
-            "Fetched checkpoint in {:.03}ms",
-            elapsed * 1000.0
+            elapsed_ms = elapsed * 1000.0,
+            "Fetched checkpoint"
         );
 
+        self.metrics.total_ingested_checkpoints.inc();
+
         self.metrics
             .total_ingested_transactions
             .inc_by(data.transactions.len() as u64);
diff --git a/crates/sui-indexer-alt/src/ingestion/mod.rs b/crates/sui-indexer-alt/src/ingestion/mod.rs
index 7a426c0b9f442..b53cfa164d870 100644
--- a/crates/sui-indexer-alt/src/ingestion/mod.rs
+++ b/crates/sui-indexer-alt/src/ingestion/mod.rs
@@ -58,10 +58,9 @@ pub struct IngestionConfig {
 impl IngestionService {
     pub fn new(
         config: IngestionConfig,
-        metrics: IndexerMetrics,
+        metrics: Arc<IndexerMetrics>,
         cancel: CancellationToken,
     ) -> Result<Self> {
-        let metrics = Arc::new(metrics);
         Ok(Self {
             client: IngestionClient::new(config.remote_store_url.clone(), metrics.clone())?,
             subscribers: Vec::new(),
@@ -205,7 +204,7 @@ mod tests {
                 concurrency,
                 retry_interval: Duration::from_millis(200),
             },
-            test_metrics(),
+            Arc::new(test_metrics()),
             cancel,
         )
         .unwrap()
diff --git a/crates/sui-indexer-alt/src/lib.rs b/crates/sui-indexer-alt/src/lib.rs
index 222563609c8cf..cabf14b6ce038 100644
--- a/crates/sui-indexer-alt/src/lib.rs
+++ b/crates/sui-indexer-alt/src/lib.rs
@@ -3,7 +3,9 @@
 
 pub mod args;
 pub mod db;
+pub mod handlers;
 pub mod ingestion;
 pub mod metrics;
-mod schema;
+pub mod models;
+pub mod schema;
 pub mod task;
diff --git a/crates/sui-indexer-alt/src/main.rs b/crates/sui-indexer-alt/src/main.rs
index 49ccf642576f6..d83427b81349a 100644
--- a/crates/sui-indexer-alt/src/main.rs
+++ b/crates/sui-indexer-alt/src/main.rs
@@ -3,14 +3,15 @@
 
 use anyhow::{Context, Result};
 use clap::Parser;
-use mysten_metrics::spawn_monitored_task;
 use sui_indexer_alt::{
-    args::Args, db::Db, ingestion::IngestionService, metrics::MetricsService,
+    args::Args,
+    db::Db,
+    handlers::{kv_checkpoints::KvCheckpoints, pipeline},
+    ingestion::IngestionService,
+    metrics::MetricsService,
     task::graceful_shutdown,
 };
-use tokio::task::JoinHandle;
 use tokio_util::sync::CancellationToken;
-use tracing::info;
 
 #[tokio::main]
 async fn main() -> Result<()> {
@@ -32,40 +33,31 @@ async fn main() -> Result<()> {
     let mut ingestion_service =
         IngestionService::new(args.ingestion, metrics.clone(), cancel.clone())?;
 
-    let metrics_handle = metrics_service
+    let h_metrics = metrics_service
         .run()
         .await
         .context("Failed to start metrics service")?;
 
-    let ingester_handle = digest_ingester(&mut ingestion_service, cancel.clone());
+    let (h_cp_handler, h_cp_committer) = pipeline::<KvCheckpoints>(
+        db.clone(),
+        ingestion_service.subscribe(),
+        args.committer.clone(),
+        metrics.clone(),
+        cancel.clone(),
+    );
 
-    let ingestion_handle = ingestion_service
+    let h_ingestion = ingestion_service
         .run()
         .await
         .context("Failed to start ingestion service")?;
 
     // Once we receive a Ctrl-C or one of the services panics or is cancelled, notify all services
     // to shutdown, and wait for them to finish.
-    graceful_shutdown([ingester_handle, metrics_handle, ingestion_handle], cancel).await;
+    graceful_shutdown(
+        [h_cp_handler, h_cp_committer, h_metrics, h_ingestion],
+        cancel,
+    )
+    .await;
 
     Ok(())
 }
-
-/// Test ingester which logs the digests of checkpoints it receives.
-fn digest_ingester(ingestion: &mut IngestionService, cancel: CancellationToken) -> JoinHandle<()> {
-    let mut rx = ingestion.subscribe();
-    spawn_monitored_task!(async move {
-        info!("Starting checkpoint digest ingester");
-        loop {
-            tokio::select! {
-                _ = cancel.cancelled() => break,
-                Some(checkpoint) = rx.recv() => {
-                    let cp = checkpoint.checkpoint_summary.sequence_number;
-                    let digest = checkpoint.checkpoint_summary.content_digest;
-                    info!("{cp}: {digest}");
-                }
-            }
-        }
-        info!("Shutdown received, stopping digest ingester");
-    })
-}
diff --git a/crates/sui-indexer-alt/src/metrics.rs b/crates/sui-indexer-alt/src/metrics.rs
index 8da21b850ec66..8d9e590854609 100644
--- a/crates/sui-indexer-alt/src/metrics.rs
+++ b/crates/sui-indexer-alt/src/metrics.rs
@@ -1,7 +1,7 @@
 // Copyright (c) Mysten Labs, Inc.
 // SPDX-License-Identifier: Apache-2.0
 
-use std::net::SocketAddr;
+use std::{net::SocketAddr, sync::Arc};
 
 use anyhow::Result;
 use axum::{extract::Extension, routing::get, Router};
@@ -9,8 +9,9 @@ use mysten_metrics::RegistryService;
 use prometheus::{
     core::{Collector, Desc},
     proto::{Counter, Gauge, LabelPair, Metric, MetricFamily, MetricType, Summary},
-    register_histogram_with_registry, register_int_counter_vec_with_registry,
-    register_int_counter_with_registry, Histogram, IntCounter, IntCounterVec, Registry,
+    register_histogram_vec_with_registry, register_histogram_with_registry,
+    register_int_counter_vec_with_registry, register_int_counter_with_registry, Histogram,
+    HistogramVec, IntCounter, IntCounterVec, Registry,
 };
 use tokio::{net::TcpListener, task::JoinHandle};
 use tokio_util::sync::CancellationToken;
@@ -23,6 +24,23 @@ const INGESTION_LATENCY_SEC_BUCKETS: &[f64] = &[
     0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0, 20.0, 50.0, 100.0,
 ];
 
+/// Histogram buckets for the distribution of latencies for processing a checkpoint in the indexer
+/// (without having to call out to other services).
+const PROCESSING_LATENCY_SEC_BUCKETS: &[f64] = &[
+    0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0, 20.0, 50.0, 100.0,
+];
+
+/// Histogram buckets for the distribution of latencies for writing to the database.
+const DB_UPDATE_LATENCY_SEC_BUCKETS: &[f64] = &[
+    0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0, 20.0, 50.0, 100.0, 200.0, 500.0, 1000.0,
+    2000.0, 5000.0, 10000.0,
+];
+
+/// Histogram buckets for the distribution of batch sizes (number of rows) written to the database.
+const BATCH_SIZE_BUCKETS: &[f64] = &[
+    1.0, 2.0, 5.0, 10.0, 20.0, 50.0, 100.0, 200.0, 500.0, 1000.0, 2000.0, 5000.0, 10000.0, 20000.0,
+];
+
 /// Service to expose prometheus metrics from the indexer.
 pub struct MetricsService {
     addr: SocketAddr,
@@ -42,9 +60,25 @@ pub struct IndexerMetrics {
     pub total_ingested_transient_retries: IntCounterVec,
     pub total_ingested_not_found_retries: IntCounter,
 
-    // Distribution of times taken to fetch data from the remote store, including time taken on
-    // retries.
     pub ingested_checkpoint_latency: Histogram,
+
+    // Statistics related to individual ingestion pipelines' handlers.
+    pub total_handler_checkpoints_received: IntCounterVec,
+    pub total_handler_checkpoints_processed: IntCounterVec,
+    pub total_handler_rows_created: IntCounterVec,
+
+    pub handler_checkpoint_latency: HistogramVec,
+
+    // Statistics related to individual ingestion pipelines' committers.
+    pub total_committer_batches_attempted: IntCounterVec,
+    pub total_committer_batches_succeeded: IntCounterVec,
+    pub total_committer_rows_received: IntCounterVec,
+    pub total_committer_rows_committed: IntCounterVec,
+    pub total_committer_rows_affected: IntCounterVec,
+
+    pub committer_gather_latency: HistogramVec,
+    pub committer_commit_latency: HistogramVec,
+    pub committer_batch_size: HistogramVec,
 }
 
 /// Collects information about the database connection pool.
@@ -61,7 +95,7 @@ impl MetricsService {
         addr: SocketAddr,
         db: Db,
         cancel: CancellationToken,
-    ) -> Result<(IndexerMetrics, MetricsService)> {
+    ) -> Result<(Arc<IndexerMetrics>, MetricsService)> {
         let registry = Registry::new_custom(Some("indexer_alt".to_string()), None)?;
 
         let metrics = IndexerMetrics::new(&registry);
@@ -74,7 +108,7 @@ impl MetricsService {
             cancel,
         };
 
-        Ok((metrics, service))
+        Ok((Arc::new(metrics), service))
     }
 
     /// Start the service. The service will run until the cancellation token is triggered.
@@ -158,6 +192,94 @@ impl IndexerMetrics {
                 registry,
             )
             .unwrap(),
+            total_handler_checkpoints_received: register_int_counter_vec_with_registry!(
+                "indexer_total_handler_checkpoints_received",
+                "Total number of checkpoints received by this handler",
+                &["pipeline"],
+                registry,
+            )
+            .unwrap(),
+            total_handler_checkpoints_processed: register_int_counter_vec_with_registry!(
+                "indexer_total_handler_checkpoints_processed",
+                "Total number of checkpoints processed (converted into rows) by this handler",
+                &["pipeline"],
+                registry,
+            )
+            .unwrap(),
+            total_handler_rows_created: register_int_counter_vec_with_registry!(
+                "indexer_total_handler_rows_created",
+                "Total number of rows created by this handler",
+                &["pipeline"],
+                registry,
+            )
+            .unwrap(),
+            handler_checkpoint_latency: register_histogram_vec_with_registry!(
+                "indexer_handler_checkpoint_latency",
+                "Time taken to process a checkpoint by this handler",
+                &["pipeline"],
+                PROCESSING_LATENCY_SEC_BUCKETS.to_vec(),
+                registry,
+            )
+            .unwrap(),
+            total_committer_batches_attempted: register_int_counter_vec_with_registry!(
+                "indexer_total_committer_batches_attempted",
+                "Total number of batches writes attempted by this committer",
+                &["pipeline"],
+                registry,
+            )
+            .unwrap(),
+            total_committer_batches_succeeded: register_int_counter_vec_with_registry!(
+                "indexer_total_committer_batches_succeeded",
+                "Total number of successful batches writes by this committer",
+                &["pipeline"],
+                registry,
+            )
+            .unwrap(),
+            total_committer_rows_received: register_int_counter_vec_with_registry!(
+                "indexer_total_committer_rows_received",
+                "Total number of rows received by this committer",
+                &["pipeline"],
+                registry,
+            )
+            .unwrap(),
+            total_committer_rows_committed: register_int_counter_vec_with_registry!(
+                "indexer_total_committer_rows_committed",
+                "Total number of rows sent to the database by this committer",
+                &["pipeline"],
+                registry,
+            )
+            .unwrap(),
+            total_committer_rows_affected: register_int_counter_vec_with_registry!(
+                "indexer_total_committer_rows_affected",
+                "Total number of rows actually written to the database by this committer",
+                &["pipeline"],
+                registry,
+            )
+            .unwrap(),
+            committer_gather_latency: register_histogram_vec_with_registry!(
+                "indexer_committer_gather_latency",
+                "Time taken to gather rows into a batch by this committer",
+                &["pipeline"],
+                PROCESSING_LATENCY_SEC_BUCKETS.to_vec(),
+                registry,
+            )
+            .unwrap(),
+            committer_commit_latency: register_histogram_vec_with_registry!(
+                "indexer_committer_commit_latency",
+                "Time taken to write a batch of rows to the database by this committer",
+                &["pipeline"],
+                DB_UPDATE_LATENCY_SEC_BUCKETS.to_vec(),
+                registry,
+            )
+            .unwrap(),
+            committer_batch_size: register_histogram_vec_with_registry!(
+                "indexer_committer_batch_size",
+                "Number of rows in a batch written to the database by this committer",
+                &["pipeline"],
+                BATCH_SIZE_BUCKETS.to_vec(),
+                registry,
+            )
+            .unwrap(),
         }
     }
 
diff --git a/crates/sui-indexer-alt/src/models/checkpoints.rs b/crates/sui-indexer-alt/src/models/checkpoints.rs
new file mode 100644
index 0000000000000..7910707d86e8a
--- /dev/null
+++ b/crates/sui-indexer-alt/src/models/checkpoints.rs
@@ -0,0 +1,15 @@
+// Copyright (c) Mysten Labs, Inc.
+// SPDX-License-Identifier: Apache-2.0
+
+use crate::schema::kv_checkpoints;
+use diesel::prelude::*;
+
+#[derive(Queryable, Insertable, Selectable, Debug, Clone, Default)]
+#[diesel(table_name = kv_checkpoints)]
+pub struct StoredCheckpoint {
+    pub sequence_number: i64,
+    /// BCS serialized CertifiedCheckpointSummary
+    pub certified_checkpoint: Vec<u8>,
+    /// BCS serialized CheckpointContents
+    pub checkpoint_contents: Vec<u8>,
+}
diff --git a/crates/sui-indexer-alt/src/models/mod.rs b/crates/sui-indexer-alt/src/models/mod.rs
new file mode 100644
index 0000000000000..1af5374622084
--- /dev/null
+++ b/crates/sui-indexer-alt/src/models/mod.rs
@@ -0,0 +1,4 @@
+// Copyright (c) Mysten Labs, Inc.
+// SPDX-License-Identifier: Apache-2.0
+
+pub mod checkpoints;

From 9ae168b3cf81281550e44f3195b1180d46152614 Mon Sep 17 00:00:00 2001
From: Ashok Menon <ashok@mystenlabs.com>
Date: Tue, 15 Oct 2024 17:59:01 +0100
Subject: [PATCH 18/33] indexer-alt: kv objects pipeline

## Description
A pipeline to fill what was previously `full_objects_history`.

## Test plan
Pipeline was run locally.
---
 .../2024-10-15-143704_objects/down.sql        |  1 +
 .../2024-10-15-143704_objects/up.sql          |  7 ++
 .../src/handlers/kv_objects.rs                | 67 +++++++++++++++++++
 crates/sui-indexer-alt/src/handlers/mod.rs    |  1 +
 crates/sui-indexer-alt/src/main.rs            | 19 +++++-
 .../sui-indexer-alt/src/models/checkpoints.rs |  2 +-
 crates/sui-indexer-alt/src/models/mod.rs      |  1 +
 crates/sui-indexer-alt/src/models/objects.rs  | 13 ++++
 crates/sui-indexer-alt/src/schema.rs          | 13 ++++
 9 files changed, 121 insertions(+), 3 deletions(-)
 create mode 100644 crates/sui-indexer-alt/migrations/2024-10-15-143704_objects/down.sql
 create mode 100644 crates/sui-indexer-alt/migrations/2024-10-15-143704_objects/up.sql
 create mode 100644 crates/sui-indexer-alt/src/handlers/kv_objects.rs
 create mode 100644 crates/sui-indexer-alt/src/models/objects.rs

diff --git a/crates/sui-indexer-alt/migrations/2024-10-15-143704_objects/down.sql b/crates/sui-indexer-alt/migrations/2024-10-15-143704_objects/down.sql
new file mode 100644
index 0000000000000..5d09c2f77e34c
--- /dev/null
+++ b/crates/sui-indexer-alt/migrations/2024-10-15-143704_objects/down.sql
@@ -0,0 +1 @@
+DROP TABLE IF EXISTS kv_objects;
diff --git a/crates/sui-indexer-alt/migrations/2024-10-15-143704_objects/up.sql b/crates/sui-indexer-alt/migrations/2024-10-15-143704_objects/up.sql
new file mode 100644
index 0000000000000..471144af9840e
--- /dev/null
+++ b/crates/sui-indexer-alt/migrations/2024-10-15-143704_objects/up.sql
@@ -0,0 +1,7 @@
+CREATE TABLE IF NOT EXISTS kv_objects
+(
+    object_id                   bytea         NOT NULL,
+    object_version              bigint        NOT NULL,
+    serialized_object           bytea,
+    PRIMARY KEY (object_id, object_version)
+);
diff --git a/crates/sui-indexer-alt/src/handlers/kv_objects.rs b/crates/sui-indexer-alt/src/handlers/kv_objects.rs
new file mode 100644
index 0000000000000..6eeaecea8c9d2
--- /dev/null
+++ b/crates/sui-indexer-alt/src/handlers/kv_objects.rs
@@ -0,0 +1,67 @@
+// Copyright (c) Mysten Labs, Inc.
+// SPDX-License-Identifier: Apache-2.0
+
+use std::sync::Arc;
+
+use anyhow::{Context, Result};
+use diesel_async::RunQueryDsl;
+use sui_types::full_checkpoint_content::CheckpointData;
+
+use crate::{db, models::objects::StoredObject, schema::kv_objects};
+
+use super::Handler;
+
+pub struct KvObjects;
+
+#[async_trait::async_trait]
+impl Handler for KvObjects {
+    const NAME: &'static str = "kv_objects";
+
+    const BATCH_SIZE: usize = 100;
+    const CHUNK_SIZE: usize = 1000;
+    const MAX_PENDING_SIZE: usize = 10000;
+
+    type Value = StoredObject;
+
+    fn handle(checkpoint: &Arc<CheckpointData>) -> Result<Vec<Self::Value>> {
+        let deleted_objects = checkpoint
+            .eventually_removed_object_refs_post_version()
+            .into_iter()
+            .map(|(id, version, _)| {
+                Ok(StoredObject {
+                    object_id: id.to_vec(),
+                    object_version: version.value() as i64,
+                    serialized_object: None,
+                })
+            });
+
+        let created_objects =
+            checkpoint
+                .transactions
+                .iter()
+                .flat_map(|txn| txn.output_objects.iter())
+                .map(|o| {
+                    let id = o.id();
+                    let version = o.version().value();
+                    Ok(StoredObject {
+                        object_id: id.to_vec(),
+                        object_version: version as i64,
+                        serialized_object: Some(bcs::to_bytes(o).with_context(|| {
+                            format!("Serializing object {id} version {version}")
+                        })?),
+                    })
+                });
+
+        deleted_objects
+            .chain(created_objects)
+            .collect::<Result<Vec<_>, _>>()
+    }
+
+    async fn commit(values: &[Self::Value], conn: &mut db::Connection<'_>) -> Result<usize> {
+        Ok(diesel::insert_into(kv_objects::table)
+            .values(values)
+            .on_conflict_do_nothing()
+            .execute(conn)
+            .await?)
+    }
+}
diff --git a/crates/sui-indexer-alt/src/handlers/mod.rs b/crates/sui-indexer-alt/src/handlers/mod.rs
index 5bfc7bb0fd039..4439c3df79421 100644
--- a/crates/sui-indexer-alt/src/handlers/mod.rs
+++ b/crates/sui-indexer-alt/src/handlers/mod.rs
@@ -21,6 +21,7 @@ use crate::{
 };
 
 pub mod kv_checkpoints;
+pub mod kv_objects;
 
 /// Extra buffer added to the channel between the handler and the committer. There does not need to
 /// be a huge capacity here because the committer is already buffering rows to insert internally.
diff --git a/crates/sui-indexer-alt/src/main.rs b/crates/sui-indexer-alt/src/main.rs
index d83427b81349a..32ab977bfbd2e 100644
--- a/crates/sui-indexer-alt/src/main.rs
+++ b/crates/sui-indexer-alt/src/main.rs
@@ -6,7 +6,7 @@ use clap::Parser;
 use sui_indexer_alt::{
     args::Args,
     db::Db,
-    handlers::{kv_checkpoints::KvCheckpoints, pipeline},
+    handlers::{kv_checkpoints::KvCheckpoints, kv_objects::KvObjects, pipeline},
     ingestion::IngestionService,
     metrics::MetricsService,
     task::graceful_shutdown,
@@ -46,6 +46,14 @@ async fn main() -> Result<()> {
         cancel.clone(),
     );
 
+    let (h_obj_handler, h_obj_committer) = pipeline::<KvObjects>(
+        db.clone(),
+        ingestion_service.subscribe(),
+        args.committer.clone(),
+        metrics.clone(),
+        cancel.clone(),
+    );
+
     let h_ingestion = ingestion_service
         .run()
         .await
@@ -54,7 +62,14 @@ async fn main() -> Result<()> {
     // Once we receive a Ctrl-C or one of the services panics or is cancelled, notify all services
     // to shutdown, and wait for them to finish.
     graceful_shutdown(
-        [h_cp_handler, h_cp_committer, h_metrics, h_ingestion],
+        [
+            h_cp_handler,
+            h_cp_committer,
+            h_obj_handler,
+            h_obj_committer,
+            h_metrics,
+            h_ingestion,
+        ],
         cancel,
     )
     .await;
diff --git a/crates/sui-indexer-alt/src/models/checkpoints.rs b/crates/sui-indexer-alt/src/models/checkpoints.rs
index 7910707d86e8a..5849e2198364c 100644
--- a/crates/sui-indexer-alt/src/models/checkpoints.rs
+++ b/crates/sui-indexer-alt/src/models/checkpoints.rs
@@ -4,7 +4,7 @@
 use crate::schema::kv_checkpoints;
 use diesel::prelude::*;
 
-#[derive(Queryable, Insertable, Selectable, Debug, Clone, Default)]
+#[derive(Insertable, Debug, Clone)]
 #[diesel(table_name = kv_checkpoints)]
 pub struct StoredCheckpoint {
     pub sequence_number: i64,
diff --git a/crates/sui-indexer-alt/src/models/mod.rs b/crates/sui-indexer-alt/src/models/mod.rs
index 1af5374622084..2f8b37339a8e7 100644
--- a/crates/sui-indexer-alt/src/models/mod.rs
+++ b/crates/sui-indexer-alt/src/models/mod.rs
@@ -2,3 +2,4 @@
 // SPDX-License-Identifier: Apache-2.0
 
 pub mod checkpoints;
+pub mod objects;
diff --git a/crates/sui-indexer-alt/src/models/objects.rs b/crates/sui-indexer-alt/src/models/objects.rs
new file mode 100644
index 0000000000000..e1d793c2eb0ac
--- /dev/null
+++ b/crates/sui-indexer-alt/src/models/objects.rs
@@ -0,0 +1,13 @@
+// Copyright (c) Mysten Labs, Inc.
+// SPDX-License-Identifier: Apache-2.0
+
+use crate::schema::kv_objects;
+use diesel::prelude::*;
+
+#[derive(Insertable, Debug, Clone)]
+#[diesel(table_name = kv_objects, primary_key(object_id, object_version))]
+pub struct StoredObject {
+    pub object_id: Vec<u8>,
+    pub object_version: i64,
+    pub serialized_object: Option<Vec<u8>>,
+}
diff --git a/crates/sui-indexer-alt/src/schema.rs b/crates/sui-indexer-alt/src/schema.rs
index ffaf45f9bc6eb..3d934b1cb40de 100644
--- a/crates/sui-indexer-alt/src/schema.rs
+++ b/crates/sui-indexer-alt/src/schema.rs
@@ -9,3 +9,16 @@ diesel::table! {
         checkpoint_contents -> Bytea,
     }
 }
+
+diesel::table! {
+    kv_objects (object_id, object_version) {
+        object_id -> Bytea,
+        object_version -> Int8,
+        serialized_object -> Nullable<Bytea>,
+    }
+}
+
+diesel::allow_tables_to_appear_in_same_query!(
+    kv_checkpoints,
+    kv_objects,
+);

From 2336c918c3cc9d7d7aebc4e0a158c08724ee1145 Mon Sep 17 00:00:00 2001
From: Ashok Menon <ashok@mystenlabs.com>
Date: Wed, 16 Oct 2024 01:09:15 +0100
Subject: [PATCH 19/33] indexer-alt: kv transactions pipeline

## Description

Port ove an idealised KV transactions table:

- Removes the following fields:
  - `transaction_digest`
  - `transaction_kind`
  - `success_command_count`
- Standardise naming for `tx_sequence_number` and `cp_sequence_number`.
- Standardise representation of arrays of BCS encoded values (use a
  BCS-encoded array instead).

So that in the end, the schema matches the schema we would eventually
store in the KV sore.

This PR also re-implements balance change calculation (rather than
relying on the implementation in `sui-json-rpc`).

## Test plan

Pipeline run locally.
---
 .../2024-10-15-170316_transactions/down.sql   |   1 +
 .../2024-10-15-170316_transactions/up.sql     |  15 +++
 .../src/handlers/kv_transactions.rs           | 116 ++++++++++++++++++
 crates/sui-indexer-alt/src/handlers/mod.rs    |   1 +
 crates/sui-indexer-alt/src/main.rs            |  15 ++-
 crates/sui-indexer-alt/src/models/mod.rs      |   1 +
 .../src/models/transactions.rs                |  35 ++++++
 crates/sui-indexer-alt/src/schema.rs          |  15 ++-
 crates/sui-json-rpc/src/balance_changes.rs    |   2 +-
 crates/sui-types/src/coin.rs                  |  21 ++--
 10 files changed, 207 insertions(+), 15 deletions(-)
 create mode 100644 crates/sui-indexer-alt/migrations/2024-10-15-170316_transactions/down.sql
 create mode 100644 crates/sui-indexer-alt/migrations/2024-10-15-170316_transactions/up.sql
 create mode 100644 crates/sui-indexer-alt/src/handlers/kv_transactions.rs
 create mode 100644 crates/sui-indexer-alt/src/models/transactions.rs

diff --git a/crates/sui-indexer-alt/migrations/2024-10-15-170316_transactions/down.sql b/crates/sui-indexer-alt/migrations/2024-10-15-170316_transactions/down.sql
new file mode 100644
index 0000000000000..fa46db0f19143
--- /dev/null
+++ b/crates/sui-indexer-alt/migrations/2024-10-15-170316_transactions/down.sql
@@ -0,0 +1 @@
+DROP TABLE IF EXISTS kv_transactions;
diff --git a/crates/sui-indexer-alt/migrations/2024-10-15-170316_transactions/up.sql b/crates/sui-indexer-alt/migrations/2024-10-15-170316_transactions/up.sql
new file mode 100644
index 0000000000000..cdc00286526bc
--- /dev/null
+++ b/crates/sui-indexer-alt/migrations/2024-10-15-170316_transactions/up.sql
@@ -0,0 +1,15 @@
+CREATE TABLE IF NOT EXISTS kv_transactions
+(
+    tx_sequence_number          BIGINT        NOT NULL,
+    cp_sequence_number          BIGINT        NOT NULL,
+    timestamp_ms                BIGINT        NOT NULL,
+    -- BCS serialized TransactionData
+    raw_transaction             BYTEA         NOT NULL,
+    -- BCS serialized TransactionEffects
+    raw_effects                 BYTEA         NOT NULL,
+    -- BCS serialized array of Events
+    events                      BYTEA         NOT NULL,
+    -- BCS serialized array of BalanceChanges
+    balance_changes             BYTEA         NOT NULL,
+    PRIMARY KEY (tx_sequence_number)
+);
diff --git a/crates/sui-indexer-alt/src/handlers/kv_transactions.rs b/crates/sui-indexer-alt/src/handlers/kv_transactions.rs
new file mode 100644
index 0000000000000..7faaff020c55a
--- /dev/null
+++ b/crates/sui-indexer-alt/src/handlers/kv_transactions.rs
@@ -0,0 +1,116 @@
+// Copyright (c) Mysten Labs, Inc.
+// SPDX-License-Identifier: Apache-2.0
+
+use std::{collections::BTreeMap, sync::Arc};
+
+use anyhow::{Context, Result};
+use diesel_async::RunQueryDsl;
+use sui_types::{
+    coin::Coin,
+    effects::TransactionEffectsAPI,
+    full_checkpoint_content::{CheckpointData, CheckpointTransaction},
+    gas_coin::GAS,
+};
+
+use crate::{
+    db,
+    models::transactions::{StoredBalanceChange, StoredTransaction},
+    schema::kv_transactions,
+};
+
+use super::Handler;
+
+pub struct KvTransactions;
+
+#[async_trait::async_trait]
+impl Handler for KvTransactions {
+    const NAME: &'static str = "kv_transactions";
+
+    const BATCH_SIZE: usize = 100;
+    const CHUNK_SIZE: usize = 1000;
+    const MAX_PENDING_SIZE: usize = 10000;
+
+    type Value = StoredTransaction;
+
+    fn handle(checkpoint: &Arc<CheckpointData>) -> Result<Vec<Self::Value>> {
+        let CheckpointData {
+            transactions,
+            checkpoint_summary,
+            ..
+        } = checkpoint.as_ref();
+
+        let mut values = Vec::with_capacity(transactions.len());
+        let first_tx = checkpoint_summary.network_total_transactions as usize - transactions.len();
+
+        for (i, tx) in transactions.iter().enumerate() {
+            let tx_sequence_number = (first_tx + i) as i64;
+            let transaction = &tx.transaction.data().intent_message().value;
+            let effects = &tx.effects;
+            let events: Vec<_> = tx.events.iter().flat_map(|e| e.data.iter()).collect();
+            let balance_changes = balance_changes(tx).with_context(|| {
+                format!("Calculating balance changes for transaction {tx_sequence_number}")
+            })?;
+
+            values.push(StoredTransaction {
+                tx_sequence_number: (first_tx + i) as i64,
+                cp_sequence_number: checkpoint_summary.sequence_number as i64,
+                timestamp_ms: checkpoint_summary.timestamp_ms as i64,
+                raw_transaction: bcs::to_bytes(transaction)
+                    .with_context(|| format!("Serializing transaction {tx_sequence_number}"))?,
+                raw_effects: bcs::to_bytes(effects).with_context(|| {
+                    format!("Serializing effects for transaction {tx_sequence_number}")
+                })?,
+                events: bcs::to_bytes(&events).with_context(|| {
+                    format!("Serializing events for transaction {tx_sequence_number}")
+                })?,
+                balance_changes: bcs::to_bytes(&balance_changes).with_context(|| {
+                    format!("Serializing balance changes for transaction {tx_sequence_number}")
+                })?,
+            });
+        }
+
+        Ok(values)
+    }
+
+    async fn commit(values: &[Self::Value], conn: &mut db::Connection<'_>) -> Result<usize> {
+        Ok(diesel::insert_into(kv_transactions::table)
+            .values(values)
+            .on_conflict_do_nothing()
+            .execute(conn)
+            .await?)
+    }
+}
+
+/// Calculate balance changes based on the object's input and output objects.
+fn balance_changes(transaction: &CheckpointTransaction) -> Result<Vec<StoredBalanceChange>> {
+    // Shortcut if the transaction failed -- we know that only gas was charged.
+    if transaction.effects.status().is_err() {
+        return Ok(vec![StoredBalanceChange {
+            owner: transaction.effects.gas_object().1,
+            coin_type: GAS::type_tag().to_canonical_string(/* with_prefix */ true),
+            amount: -(transaction.effects.gas_cost_summary().net_gas_usage() as i128),
+        }]);
+    }
+
+    let mut changes = BTreeMap::new();
+    for object in &transaction.input_objects {
+        if let Some((type_, balance)) = Coin::extract_balance_if_coin(object)? {
+            *changes.entry((object.owner(), type_)).or_insert(0i128) -= balance as i128;
+        }
+    }
+
+    for object in &transaction.output_objects {
+        if let Some((type_, balance)) = Coin::extract_balance_if_coin(object)? {
+            *changes.entry((object.owner(), type_)).or_insert(0i128) += balance as i128;
+        }
+    }
+
+    Ok(changes
+        .into_iter()
+        .map(|((owner, coin_type), amount)| StoredBalanceChange {
+            owner: *owner,
+            coin_type: coin_type.to_canonical_string(/* with_prefix */ true),
+            amount,
+        })
+        .collect())
+}
diff --git a/crates/sui-indexer-alt/src/handlers/mod.rs b/crates/sui-indexer-alt/src/handlers/mod.rs
index 4439c3df79421..213fabddd26a4 100644
--- a/crates/sui-indexer-alt/src/handlers/mod.rs
+++ b/crates/sui-indexer-alt/src/handlers/mod.rs
@@ -22,6 +22,7 @@ use crate::{
 
 pub mod kv_checkpoints;
 pub mod kv_objects;
+pub mod kv_transactions;
 
 /// Extra buffer added to the channel between the handler and the committer. There does not need to
 /// be a huge capacity here because the committer is already buffering rows to insert internally.
diff --git a/crates/sui-indexer-alt/src/main.rs b/crates/sui-indexer-alt/src/main.rs
index 32ab977bfbd2e..47254eeb44e40 100644
--- a/crates/sui-indexer-alt/src/main.rs
+++ b/crates/sui-indexer-alt/src/main.rs
@@ -6,7 +6,10 @@ use clap::Parser;
 use sui_indexer_alt::{
     args::Args,
     db::Db,
-    handlers::{kv_checkpoints::KvCheckpoints, kv_objects::KvObjects, pipeline},
+    handlers::{
+        kv_checkpoints::KvCheckpoints, kv_objects::KvObjects, kv_transactions::KvTransactions,
+        pipeline,
+    },
     ingestion::IngestionService,
     metrics::MetricsService,
     task::graceful_shutdown,
@@ -54,6 +57,14 @@ async fn main() -> Result<()> {
         cancel.clone(),
     );
 
+    let (h_tx_handler, h_tx_committer) = pipeline::<KvTransactions>(
+        db.clone(),
+        ingestion_service.subscribe(),
+        args.committer.clone(),
+        metrics.clone(),
+        cancel.clone(),
+    );
+
     let h_ingestion = ingestion_service
         .run()
         .await
@@ -67,6 +78,8 @@ async fn main() -> Result<()> {
             h_cp_committer,
             h_obj_handler,
             h_obj_committer,
+            h_tx_handler,
+            h_tx_committer,
             h_metrics,
             h_ingestion,
         ],
diff --git a/crates/sui-indexer-alt/src/models/mod.rs b/crates/sui-indexer-alt/src/models/mod.rs
index 2f8b37339a8e7..a5f137fbac1b7 100644
--- a/crates/sui-indexer-alt/src/models/mod.rs
+++ b/crates/sui-indexer-alt/src/models/mod.rs
@@ -3,3 +3,4 @@
 
 pub mod checkpoints;
 pub mod objects;
+pub mod transactions;
diff --git a/crates/sui-indexer-alt/src/models/transactions.rs b/crates/sui-indexer-alt/src/models/transactions.rs
new file mode 100644
index 0000000000000..947fba0ed8cfc
--- /dev/null
+++ b/crates/sui-indexer-alt/src/models/transactions.rs
@@ -0,0 +1,35 @@
+// Copyright (c) Mysten Labs, Inc.
+// SPDX-License-Identifier: Apache-2.0
+
+use crate::schema::kv_transactions;
+use diesel::prelude::*;
+use serde::{Deserialize, Serialize};
+use sui_types::object::Owner;
+
+#[derive(Insertable, Debug, Clone)]
+#[diesel(table_name = kv_transactions)]
+pub struct StoredTransaction {
+    pub tx_sequence_number: i64,
+    pub cp_sequence_number: i64,
+    pub timestamp_ms: i64,
+    pub raw_transaction: Vec<u8>,
+    pub raw_effects: Vec<u8>,
+    pub events: Vec<u8>,
+    pub balance_changes: Vec<u8>,
+}
+
+/// Even though the balance changes are not a protocol structure, they are stored in the database
+/// as a BCS-encoded array. This is mainly to keep sizes down, but when stored in the key-value
+/// store, balance changes are likely to be JSON encoded.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct StoredBalanceChange {
+    /// Owner whose balance changed
+    pub owner: Owner,
+
+    /// Type of the Coin (just the one-time witness type).
+    pub coin_type: String,
+
+    /// The amount the balance changed by. A negative amount means the net flow of value is from
+    /// the owner, and a positive amount means the net flow of value is to the owner.
+    pub amount: i128,
+}
diff --git a/crates/sui-indexer-alt/src/schema.rs b/crates/sui-indexer-alt/src/schema.rs
index 3d934b1cb40de..f3775301bfc4e 100644
--- a/crates/sui-indexer-alt/src/schema.rs
+++ b/crates/sui-indexer-alt/src/schema.rs
@@ -1,5 +1,3 @@
-// Copyright (c) Mysten Labs, Inc.
-// SPDX-License-Identifier: Apache-2.0
 // @generated automatically by Diesel CLI.
 
 diesel::table! {
@@ -18,7 +16,20 @@ diesel::table! {
     }
 }
 
+diesel::table! {
+    kv_transactions (tx_sequence_number) {
+        tx_sequence_number -> Int8,
+        cp_sequence_number -> Int8,
+        timestamp_ms -> Int8,
+        raw_transaction -> Bytea,
+        raw_effects -> Bytea,
+        events -> Bytea,
+        balance_changes -> Bytea,
+    }
+}
+
 diesel::allow_tables_to_appear_in_same_query!(
     kv_checkpoints,
     kv_objects,
+    kv_transactions,
 );
diff --git a/crates/sui-json-rpc/src/balance_changes.rs b/crates/sui-json-rpc/src/balance_changes.rs
index 279f6f83246d2..224224d682150 100644
--- a/crates/sui-json-rpc/src/balance_changes.rs
+++ b/crates/sui-json-rpc/src/balance_changes.rs
@@ -148,7 +148,7 @@ async fn fetch_coins<P: ObjectProvider<Error = E>, E>(
                     o.owner,
                     coin_type,
                     // we know this is a coin, safe to unwrap
-                    Coin::extract_balance_if_coin(&o).unwrap().unwrap(),
+                    Coin::extract_balance_if_coin(&o).unwrap().unwrap().1,
                 ))
             }
         }
diff --git a/crates/sui-types/src/coin.rs b/crates/sui-types/src/coin.rs
index d1f19defc6cc3..6c2fc3abd6ece 100644
--- a/crates/sui-types/src/coin.rs
+++ b/crates/sui-types/src/coin.rs
@@ -67,18 +67,17 @@ impl Coin {
     /// If the given object is a Coin, deserialize its contents and extract the balance Ok(Some(u64)).
     /// If it's not a Coin, return Ok(None).
     /// The cost is 2 comparisons if not a coin, and deserialization if its a Coin.
-    pub fn extract_balance_if_coin(object: &Object) -> Result<Option<u64>, bcs::Error> {
-        match &object.data {
-            Data::Move(move_obj) => {
-                if !move_obj.is_coin() {
-                    return Ok(None);
-                }
+    pub fn extract_balance_if_coin(object: &Object) -> Result<Option<(TypeTag, u64)>, bcs::Error> {
+        let Data::Move(obj) = &object.data else {
+            return Ok(None);
+        };
 
-                let coin = Self::from_bcs_bytes(move_obj.contents())?;
-                Ok(Some(coin.value()))
-            }
-            _ => Ok(None), // package
-        }
+        let Some(type_) = obj.type_().coin_type_maybe() else {
+            return Ok(None);
+        };
+
+        let coin = Self::from_bcs_bytes(obj.contents())?;
+        Ok(Some((type_, coin.value())))
     }
 
     pub fn id(&self) -> &ObjectID {

From 42a37b402b10083372d500a576621559dffe1cfd Mon Sep 17 00:00:00 2001
From: Ashok Menon <ashok@mystenlabs.com>
Date: Wed, 16 Oct 2024 01:48:31 +0100
Subject: [PATCH 20/33] indexer-alt: tx affected objects pipeline

---
 .../down.sql                                  |  1 +
 .../up.sql                                    | 12 ++++
 crates/sui-indexer-alt/src/handlers/mod.rs    |  1 +
 .../src/handlers/tx_affected_objects.rs       | 62 +++++++++++++++++++
 crates/sui-indexer-alt/src/main.rs            | 12 +++-
 .../src/models/transactions.rs                | 34 ++++++----
 crates/sui-indexer-alt/src/schema.rs          | 11 ++++
 7 files changed, 119 insertions(+), 14 deletions(-)
 create mode 100644 crates/sui-indexer-alt/migrations/2024-10-16-002409_tx_affected_objects/down.sql
 create mode 100644 crates/sui-indexer-alt/migrations/2024-10-16-002409_tx_affected_objects/up.sql
 create mode 100644 crates/sui-indexer-alt/src/handlers/tx_affected_objects.rs

diff --git a/crates/sui-indexer-alt/migrations/2024-10-16-002409_tx_affected_objects/down.sql b/crates/sui-indexer-alt/migrations/2024-10-16-002409_tx_affected_objects/down.sql
new file mode 100644
index 0000000000000..b0868da73b0f2
--- /dev/null
+++ b/crates/sui-indexer-alt/migrations/2024-10-16-002409_tx_affected_objects/down.sql
@@ -0,0 +1 @@
+DROP TABLE IF EXISTS tx_affected_objects;
diff --git a/crates/sui-indexer-alt/migrations/2024-10-16-002409_tx_affected_objects/up.sql b/crates/sui-indexer-alt/migrations/2024-10-16-002409_tx_affected_objects/up.sql
new file mode 100644
index 0000000000000..8aeb7875689a3
--- /dev/null
+++ b/crates/sui-indexer-alt/migrations/2024-10-16-002409_tx_affected_objects/up.sql
@@ -0,0 +1,12 @@
+CREATE TABLE IF NOT EXISTS tx_affected_objects (
+    tx_sequence_number          BIGINT       NOT NULL,
+    affected                    BYTEA        NOT NULL,
+    sender                      BYTEA        NOT NULL,
+    PRIMARY KEY(affected, tx_sequence_number)
+);
+
+CREATE INDEX IF NOT EXISTS tx_affected_objects_tx_sequence_number
+ON tx_affected_objects (tx_sequence_number);
+
+CREATE INDEX IF NOT EXISTS tx_affected_objects_sender
+ON tx_affected_objects (sender, affected, tx_sequence_number);
diff --git a/crates/sui-indexer-alt/src/handlers/mod.rs b/crates/sui-indexer-alt/src/handlers/mod.rs
index 213fabddd26a4..e8487268aea5d 100644
--- a/crates/sui-indexer-alt/src/handlers/mod.rs
+++ b/crates/sui-indexer-alt/src/handlers/mod.rs
@@ -23,6 +23,7 @@ use crate::{
 pub mod kv_checkpoints;
 pub mod kv_objects;
 pub mod kv_transactions;
+pub mod tx_affected_objects;
 
 /// Extra buffer added to the channel between the handler and the committer. There does not need to
 /// be a huge capacity here because the committer is already buffering rows to insert internally.
diff --git a/crates/sui-indexer-alt/src/handlers/tx_affected_objects.rs b/crates/sui-indexer-alt/src/handlers/tx_affected_objects.rs
new file mode 100644
index 0000000000000..906637d4b2a97
--- /dev/null
+++ b/crates/sui-indexer-alt/src/handlers/tx_affected_objects.rs
@@ -0,0 +1,62 @@
+// Copyright (c) Mysten Labs, Inc.
+// SPDX-License-Identifier: Apache-2.0
+
+use std::sync::Arc;
+
+use anyhow::Result;
+use diesel_async::RunQueryDsl;
+use sui_types::{effects::TransactionEffectsAPI, full_checkpoint_content::CheckpointData};
+
+use crate::{db, models::transactions::StoredTxAffectedObjects, schema::tx_affected_objects};
+
+use super::Handler;
+
+pub struct TxAffectedObjects;
+
+#[async_trait::async_trait]
+impl Handler for TxAffectedObjects {
+    const NAME: &'static str = "tx_affected_objects";
+
+    const BATCH_SIZE: usize = 100;
+    const CHUNK_SIZE: usize = 1000;
+    const MAX_PENDING_SIZE: usize = 10000;
+
+    type Value = StoredTxAffectedObjects;
+
+    fn handle(checkpoint: &Arc<CheckpointData>) -> Result<Vec<Self::Value>> {
+        let CheckpointData {
+            transactions,
+            checkpoint_summary,
+            ..
+        } = checkpoint.as_ref();
+
+        let mut values = Vec::new();
+        let first_tx = checkpoint_summary.network_total_transactions as usize - transactions.len();
+
+        for (i, tx) in transactions.iter().enumerate() {
+            let tx_sequence_number = (first_tx + i) as i64;
+            let sender = tx.transaction.sender_address();
+
+            values.extend(
+                tx.effects
+                    .object_changes()
+                    .iter()
+                    .map(|o| StoredTxAffectedObjects {
+                        tx_sequence_number,
+                        affected: o.id.to_vec(),
+                        sender: sender.to_vec(),
+                    }),
+            );
+        }
+
+        Ok(values)
+    }
+
+    async fn commit(values: &[Self::Value], conn: &mut db::Connection<'_>) -> Result<usize> {
+        Ok(diesel::insert_into(tx_affected_objects::table)
+            .values(values)
+            .on_conflict_do_nothing()
+            .execute(conn)
+            .await?)
+    }
+}
diff --git a/crates/sui-indexer-alt/src/main.rs b/crates/sui-indexer-alt/src/main.rs
index 47254eeb44e40..4914a75ba1711 100644
--- a/crates/sui-indexer-alt/src/main.rs
+++ b/crates/sui-indexer-alt/src/main.rs
@@ -8,7 +8,7 @@ use sui_indexer_alt::{
     db::Db,
     handlers::{
         kv_checkpoints::KvCheckpoints, kv_objects::KvObjects, kv_transactions::KvTransactions,
-        pipeline,
+        pipeline, tx_affected_objects::TxAffectedObjects,
     },
     ingestion::IngestionService,
     metrics::MetricsService,
@@ -65,6 +65,14 @@ async fn main() -> Result<()> {
         cancel.clone(),
     );
 
+    let (h_tx_objs_handler, h_tx_objs_committer) = pipeline::<TxAffectedObjects>(
+        db.clone(),
+        ingestion_service.subscribe(),
+        args.committer.clone(),
+        metrics.clone(),
+        cancel.clone(),
+    );
+
     let h_ingestion = ingestion_service
         .run()
         .await
@@ -80,6 +88,8 @@ async fn main() -> Result<()> {
             h_obj_committer,
             h_tx_handler,
             h_tx_committer,
+            h_tx_objs_handler,
+            h_tx_objs_committer,
             h_metrics,
             h_ingestion,
         ],
diff --git a/crates/sui-indexer-alt/src/models/transactions.rs b/crates/sui-indexer-alt/src/models/transactions.rs
index 947fba0ed8cfc..2793b006ad95d 100644
--- a/crates/sui-indexer-alt/src/models/transactions.rs
+++ b/crates/sui-indexer-alt/src/models/transactions.rs
@@ -1,23 +1,11 @@
 // Copyright (c) Mysten Labs, Inc.
 // SPDX-License-Identifier: Apache-2.0
 
-use crate::schema::kv_transactions;
+use crate::schema::{kv_transactions, tx_affected_objects};
 use diesel::prelude::*;
 use serde::{Deserialize, Serialize};
 use sui_types::object::Owner;
 
-#[derive(Insertable, Debug, Clone)]
-#[diesel(table_name = kv_transactions)]
-pub struct StoredTransaction {
-    pub tx_sequence_number: i64,
-    pub cp_sequence_number: i64,
-    pub timestamp_ms: i64,
-    pub raw_transaction: Vec<u8>,
-    pub raw_effects: Vec<u8>,
-    pub events: Vec<u8>,
-    pub balance_changes: Vec<u8>,
-}
-
 /// Even though the balance changes are not a protocol structure, they are stored in the database
 /// as a BCS-encoded array. This is mainly to keep sizes down, but when stored in the key-value
 /// store, balance changes are likely to be JSON encoded.
@@ -33,3 +21,23 @@ pub struct StoredBalanceChange {
     /// the owner, and a positive amount means the net flow of value is to the owner.
     pub amount: i128,
 }
+
+#[derive(Insertable, Debug, Clone)]
+#[diesel(table_name = kv_transactions)]
+pub struct StoredTransaction {
+    pub tx_sequence_number: i64,
+    pub cp_sequence_number: i64,
+    pub timestamp_ms: i64,
+    pub raw_transaction: Vec<u8>,
+    pub raw_effects: Vec<u8>,
+    pub events: Vec<u8>,
+    pub balance_changes: Vec<u8>,
+}
+
+#[derive(Insertable, Debug, Clone)]
+#[diesel(table_name = tx_affected_objects)]
+pub struct StoredTxAffectedObjects {
+    pub tx_sequence_number: i64,
+    pub affected: Vec<u8>,
+    pub sender: Vec<u8>,
+}
diff --git a/crates/sui-indexer-alt/src/schema.rs b/crates/sui-indexer-alt/src/schema.rs
index f3775301bfc4e..ac2b96ada96f9 100644
--- a/crates/sui-indexer-alt/src/schema.rs
+++ b/crates/sui-indexer-alt/src/schema.rs
@@ -1,3 +1,5 @@
+// Copyright (c) Mysten Labs, Inc.
+// SPDX-License-Identifier: Apache-2.0
 // @generated automatically by Diesel CLI.
 
 diesel::table! {
@@ -28,8 +30,17 @@ diesel::table! {
     }
 }
 
+diesel::table! {
+    tx_affected_objects (affected, tx_sequence_number) {
+        tx_sequence_number -> Int8,
+        affected -> Bytea,
+        sender -> Bytea,
+    }
+}
+
 diesel::allow_tables_to_appear_in_same_query!(
     kv_checkpoints,
     kv_objects,
     kv_transactions,
+    tx_affected_objects,
 );

From 9bbb7e49b35df1d072a246e148150edb436cc1c9 Mon Sep 17 00:00:00 2001
From: Ashok Menon <ashok@mystenlabs.com>
Date: Wed, 16 Oct 2024 22:41:17 +0100
Subject: [PATCH 21/33] indexer-alt: split out tx balance changes

---
 .../2024-10-15-170316_transactions/up.sql     |   7 +-
 .../down.sql                                  |   1 +
 .../up.sql                                    |   6 +
 .../src/handlers/kv_transactions.rs           |  55 +---------
 crates/sui-indexer-alt/src/handlers/mod.rs    |   1 +
 .../src/handlers/tx_balance_changes.rs        | 103 ++++++++++++++++++
 crates/sui-indexer-alt/src/main.rs            |  12 +-
 .../src/models/transactions.rs                |  31 +++---
 crates/sui-indexer-alt/src/schema.rs          |   9 +-
 9 files changed, 153 insertions(+), 72 deletions(-)
 create mode 100644 crates/sui-indexer-alt/migrations/2024-10-16-211445_tx_balance_changes/down.sql
 create mode 100644 crates/sui-indexer-alt/migrations/2024-10-16-211445_tx_balance_changes/up.sql
 create mode 100644 crates/sui-indexer-alt/src/handlers/tx_balance_changes.rs

diff --git a/crates/sui-indexer-alt/migrations/2024-10-15-170316_transactions/up.sql b/crates/sui-indexer-alt/migrations/2024-10-15-170316_transactions/up.sql
index cdc00286526bc..394da91909d72 100644
--- a/crates/sui-indexer-alt/migrations/2024-10-15-170316_transactions/up.sql
+++ b/crates/sui-indexer-alt/migrations/2024-10-15-170316_transactions/up.sql
@@ -1,6 +1,6 @@
 CREATE TABLE IF NOT EXISTS kv_transactions
 (
-    tx_sequence_number          BIGINT        NOT NULL,
+    tx_sequence_number          BIGINT        PRIMARY KEY,
     cp_sequence_number          BIGINT        NOT NULL,
     timestamp_ms                BIGINT        NOT NULL,
     -- BCS serialized TransactionData
@@ -8,8 +8,5 @@ CREATE TABLE IF NOT EXISTS kv_transactions
     -- BCS serialized TransactionEffects
     raw_effects                 BYTEA         NOT NULL,
     -- BCS serialized array of Events
-    events                      BYTEA         NOT NULL,
-    -- BCS serialized array of BalanceChanges
-    balance_changes             BYTEA         NOT NULL,
-    PRIMARY KEY (tx_sequence_number)
+    events                      BYTEA         NOT NULL
 );
diff --git a/crates/sui-indexer-alt/migrations/2024-10-16-211445_tx_balance_changes/down.sql b/crates/sui-indexer-alt/migrations/2024-10-16-211445_tx_balance_changes/down.sql
new file mode 100644
index 0000000000000..e36b0a7736cc2
--- /dev/null
+++ b/crates/sui-indexer-alt/migrations/2024-10-16-211445_tx_balance_changes/down.sql
@@ -0,0 +1 @@
+DROP TABLE IF EXISTS tx_balance_changes;
diff --git a/crates/sui-indexer-alt/migrations/2024-10-16-211445_tx_balance_changes/up.sql b/crates/sui-indexer-alt/migrations/2024-10-16-211445_tx_balance_changes/up.sql
new file mode 100644
index 0000000000000..790c5aa14d543
--- /dev/null
+++ b/crates/sui-indexer-alt/migrations/2024-10-16-211445_tx_balance_changes/up.sql
@@ -0,0 +1,6 @@
+CREATE TABLE IF NOT EXISTS tx_balance_changes
+(
+    tx_sequence_number          BIGINT        PRIMARY KEY,
+    -- BCS serialized array of BalanceChanges
+    balance_changes             BYTEA         NOT NULL
+);
diff --git a/crates/sui-indexer-alt/src/handlers/kv_transactions.rs b/crates/sui-indexer-alt/src/handlers/kv_transactions.rs
index 7faaff020c55a..55371f50fac5a 100644
--- a/crates/sui-indexer-alt/src/handlers/kv_transactions.rs
+++ b/crates/sui-indexer-alt/src/handlers/kv_transactions.rs
@@ -1,22 +1,13 @@
 // Copyright (c) Mysten Labs, Inc.
 // SPDX-License-Identifier: Apache-2.0
 
-use std::{collections::BTreeMap, sync::Arc};
+use std::sync::Arc;
 
 use anyhow::{Context, Result};
 use diesel_async::RunQueryDsl;
-use sui_types::{
-    coin::Coin,
-    effects::TransactionEffectsAPI,
-    full_checkpoint_content::{CheckpointData, CheckpointTransaction},
-    gas_coin::GAS,
-};
+use sui_types::full_checkpoint_content::CheckpointData;
 
-use crate::{
-    db,
-    models::transactions::{StoredBalanceChange, StoredTransaction},
-    schema::kv_transactions,
-};
+use crate::{db, models::transactions::StoredTransaction, schema::kv_transactions};
 
 use super::Handler;
 
@@ -47,9 +38,6 @@ impl Handler for KvTransactions {
             let transaction = &tx.transaction.data().intent_message().value;
             let effects = &tx.effects;
             let events: Vec<_> = tx.events.iter().flat_map(|e| e.data.iter()).collect();
-            let balance_changes = balance_changes(tx).with_context(|| {
-                format!("Calculating balance changes for transaction {tx_sequence_number}")
-            })?;
 
             values.push(StoredTransaction {
                 tx_sequence_number: (first_tx + i) as i64,
@@ -63,9 +51,6 @@ impl Handler for KvTransactions {
                 events: bcs::to_bytes(&events).with_context(|| {
                     format!("Serializing events for transaction {tx_sequence_number}")
                 })?,
-                balance_changes: bcs::to_bytes(&balance_changes).with_context(|| {
-                    format!("Serializing balance changes for transaction {tx_sequence_number}")
-                })?,
             });
         }
 
@@ -80,37 +65,3 @@ impl Handler for KvTransactions {
             .await?)
     }
 }
-
-/// Calculate balance changes based on the object's input and output objects.
-fn balance_changes(transaction: &CheckpointTransaction) -> Result<Vec<StoredBalanceChange>> {
-    // Shortcut if the transaction failed -- we know that only gas was charged.
-    if transaction.effects.status().is_err() {
-        return Ok(vec![StoredBalanceChange {
-            owner: transaction.effects.gas_object().1,
-            coin_type: GAS::type_tag().to_canonical_string(/* with_prefix */ true),
-            amount: -(transaction.effects.gas_cost_summary().net_gas_usage() as i128),
-        }]);
-    }
-
-    let mut changes = BTreeMap::new();
-    for object in &transaction.input_objects {
-        if let Some((type_, balance)) = Coin::extract_balance_if_coin(object)? {
-            *changes.entry((object.owner(), type_)).or_insert(0i128) -= balance as i128;
-        }
-    }
-
-    for object in &transaction.output_objects {
-        if let Some((type_, balance)) = Coin::extract_balance_if_coin(object)? {
-            *changes.entry((object.owner(), type_)).or_insert(0i128) += balance as i128;
-        }
-    }
-
-    Ok(changes
-        .into_iter()
-        .map(|((owner, coin_type), amount)| StoredBalanceChange {
-            owner: *owner,
-            coin_type: coin_type.to_canonical_string(/* with_prefix */ true),
-            amount,
-        })
-        .collect())
-}
diff --git a/crates/sui-indexer-alt/src/handlers/mod.rs b/crates/sui-indexer-alt/src/handlers/mod.rs
index e8487268aea5d..ced26a6c63a7d 100644
--- a/crates/sui-indexer-alt/src/handlers/mod.rs
+++ b/crates/sui-indexer-alt/src/handlers/mod.rs
@@ -24,6 +24,7 @@ pub mod kv_checkpoints;
 pub mod kv_objects;
 pub mod kv_transactions;
 pub mod tx_affected_objects;
+pub mod tx_balance_changes;
 
 /// Extra buffer added to the channel between the handler and the committer. There does not need to
 /// be a huge capacity here because the committer is already buffering rows to insert internally.
diff --git a/crates/sui-indexer-alt/src/handlers/tx_balance_changes.rs b/crates/sui-indexer-alt/src/handlers/tx_balance_changes.rs
new file mode 100644
index 0000000000000..f23e25dae71ec
--- /dev/null
+++ b/crates/sui-indexer-alt/src/handlers/tx_balance_changes.rs
@@ -0,0 +1,103 @@
+// Copyright (c) Mysten Labs, Inc.
+// SPDX-License-Identifier: Apache-2.0
+
+use std::{collections::BTreeMap, sync::Arc};
+
+use anyhow::{Context, Result};
+use diesel_async::RunQueryDsl;
+use sui_types::{
+    coin::Coin,
+    effects::TransactionEffectsAPI,
+    full_checkpoint_content::{CheckpointData, CheckpointTransaction},
+    gas_coin::GAS,
+};
+
+use crate::{
+    db,
+    models::transactions::{BalanceChange, StoredTxBalanceChange},
+    schema::tx_balance_changes,
+};
+
+use super::Handler;
+
+pub struct TxBalanceChanges;
+
+#[async_trait::async_trait]
+impl Handler for TxBalanceChanges {
+    const NAME: &'static str = "tx_balance_changes";
+
+    const BATCH_SIZE: usize = 100;
+    const CHUNK_SIZE: usize = 1000;
+    const MAX_PENDING_SIZE: usize = 10000;
+
+    type Value = StoredTxBalanceChange;
+
+    fn handle(checkpoint: &Arc<CheckpointData>) -> Result<Vec<Self::Value>> {
+        let CheckpointData {
+            transactions,
+            checkpoint_summary,
+            ..
+        } = checkpoint.as_ref();
+
+        let mut values = Vec::new();
+        let first_tx = checkpoint_summary.network_total_transactions as usize - transactions.len();
+
+        for (i, tx) in transactions.iter().enumerate() {
+            let tx_sequence_number = (first_tx + i) as i64;
+            let balance_changes = balance_changes(tx).with_context(|| {
+                format!("Calculating balance changes for transaction {tx_sequence_number}")
+            })?;
+
+            values.push(StoredTxBalanceChange {
+                tx_sequence_number,
+                balance_changes: bcs::to_bytes(&balance_changes).with_context(|| {
+                    format!("Serializing balance changes for transaction {tx_sequence_number}")
+                })?,
+            });
+        }
+
+        Ok(values)
+    }
+
+    async fn commit(values: &[Self::Value], conn: &mut db::Connection<'_>) -> Result<usize> {
+        Ok(diesel::insert_into(tx_balance_changes::table)
+            .values(values)
+            .on_conflict_do_nothing()
+            .execute(conn)
+            .await?)
+    }
+}
+
+/// Calculate balance changes based on the object's input and output objects.
+fn balance_changes(transaction: &CheckpointTransaction) -> Result<Vec<BalanceChange>> {
+    // Shortcut if the transaction failed -- we know that only gas was charged.
+    if transaction.effects.status().is_err() {
+        return Ok(vec![BalanceChange::V1 {
+            owner: transaction.effects.gas_object().1,
+            coin_type: GAS::type_tag().to_canonical_string(/* with_prefix */ true),
+            amount: -(transaction.effects.gas_cost_summary().net_gas_usage() as i128),
+        }]);
+    }
+
+    let mut changes = BTreeMap::new();
+    for object in &transaction.input_objects {
+        if let Some((type_, balance)) = Coin::extract_balance_if_coin(object)? {
+            *changes.entry((object.owner(), type_)).or_insert(0i128) -= balance as i128;
+        }
+    }
+
+    for object in &transaction.output_objects {
+        if let Some((type_, balance)) = Coin::extract_balance_if_coin(object)? {
+            *changes.entry((object.owner(), type_)).or_insert(0i128) += balance as i128;
+        }
+    }
+
+    Ok(changes
+        .into_iter()
+        .map(|((owner, coin_type), amount)| BalanceChange::V1 {
+            owner: *owner,
+            coin_type: coin_type.to_canonical_string(/* with_prefix */ true),
+            amount,
+        })
+        .collect())
+}
diff --git a/crates/sui-indexer-alt/src/main.rs b/crates/sui-indexer-alt/src/main.rs
index 4914a75ba1711..87b6e341a2eda 100644
--- a/crates/sui-indexer-alt/src/main.rs
+++ b/crates/sui-indexer-alt/src/main.rs
@@ -8,7 +8,7 @@ use sui_indexer_alt::{
     db::Db,
     handlers::{
         kv_checkpoints::KvCheckpoints, kv_objects::KvObjects, kv_transactions::KvTransactions,
-        pipeline, tx_affected_objects::TxAffectedObjects,
+        pipeline, tx_affected_objects::TxAffectedObjects, tx_balance_changes::TxBalanceChanges,
     },
     ingestion::IngestionService,
     metrics::MetricsService,
@@ -73,6 +73,14 @@ async fn main() -> Result<()> {
         cancel.clone(),
     );
 
+    let (h_tx_bal_handler, h_tx_bal_committer) = pipeline::<TxBalanceChanges>(
+        db.clone(),
+        ingestion_service.subscribe(),
+        args.committer.clone(),
+        metrics.clone(),
+        cancel.clone(),
+    );
+
     let h_ingestion = ingestion_service
         .run()
         .await
@@ -90,6 +98,8 @@ async fn main() -> Result<()> {
             h_tx_committer,
             h_tx_objs_handler,
             h_tx_objs_committer,
+            h_tx_bal_handler,
+            h_tx_bal_committer,
             h_metrics,
             h_ingestion,
         ],
diff --git a/crates/sui-indexer-alt/src/models/transactions.rs b/crates/sui-indexer-alt/src/models/transactions.rs
index 2793b006ad95d..044daef779ad7 100644
--- a/crates/sui-indexer-alt/src/models/transactions.rs
+++ b/crates/sui-indexer-alt/src/models/transactions.rs
@@ -1,25 +1,24 @@
 // Copyright (c) Mysten Labs, Inc.
 // SPDX-License-Identifier: Apache-2.0
 
-use crate::schema::{kv_transactions, tx_affected_objects};
+use crate::schema::{kv_transactions, tx_affected_objects, tx_balance_changes};
 use diesel::prelude::*;
 use serde::{Deserialize, Serialize};
 use sui_types::object::Owner;
 
-/// Even though the balance changes are not a protocol structure, they are stored in the database
-/// as a BCS-encoded array. This is mainly to keep sizes down, but when stored in the key-value
-/// store, balance changes are likely to be JSON encoded.
 #[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct StoredBalanceChange {
-    /// Owner whose balance changed
-    pub owner: Owner,
+pub enum BalanceChange {
+    V1 {
+        /// Owner whose balance changed
+        owner: Owner,
 
-    /// Type of the Coin (just the one-time witness type).
-    pub coin_type: String,
+        /// Type of the Coin (just the one-time witness type).
+        coin_type: String,
 
-    /// The amount the balance changed by. A negative amount means the net flow of value is from
-    /// the owner, and a positive amount means the net flow of value is to the owner.
-    pub amount: i128,
+        /// The amount the balance changed by. A negative amount means the net flow of value is
+        /// from the owner, and a positive amount means the net flow of value is to the owner.
+        amount: i128,
+    },
 }
 
 #[derive(Insertable, Debug, Clone)]
@@ -31,7 +30,6 @@ pub struct StoredTransaction {
     pub raw_transaction: Vec<u8>,
     pub raw_effects: Vec<u8>,
     pub events: Vec<u8>,
-    pub balance_changes: Vec<u8>,
 }
 
 #[derive(Insertable, Debug, Clone)]
@@ -41,3 +39,10 @@ pub struct StoredTxAffectedObjects {
     pub affected: Vec<u8>,
     pub sender: Vec<u8>,
 }
+
+#[derive(Insertable, Debug, Clone)]
+#[diesel(table_name = tx_balance_changes)]
+pub struct StoredTxBalanceChange {
+    pub tx_sequence_number: i64,
+    pub balance_changes: Vec<u8>,
+}
diff --git a/crates/sui-indexer-alt/src/schema.rs b/crates/sui-indexer-alt/src/schema.rs
index ac2b96ada96f9..8404c4b3041fa 100644
--- a/crates/sui-indexer-alt/src/schema.rs
+++ b/crates/sui-indexer-alt/src/schema.rs
@@ -26,7 +26,6 @@ diesel::table! {
         raw_transaction -> Bytea,
         raw_effects -> Bytea,
         events -> Bytea,
-        balance_changes -> Bytea,
     }
 }
 
@@ -38,9 +37,17 @@ diesel::table! {
     }
 }
 
+diesel::table! {
+    tx_balance_changes (tx_sequence_number) {
+        tx_sequence_number -> Int8,
+        balance_changes -> Bytea,
+    }
+}
+
 diesel::allow_tables_to_appear_in_same_query!(
     kv_checkpoints,
     kv_objects,
     kv_transactions,
     tx_affected_objects,
+    tx_balance_changes,
 );

From 0215f22bd3a3e1ade1e02cc48245fa194a5df30d Mon Sep 17 00:00:00 2001
From: Ashok Menon <ashok@mystenlabs.com>
Date: Wed, 16 Oct 2024 22:42:01 +0100
Subject: [PATCH 22/33] chore(indexer-alt): unpluralize StoredTxAffectedObjects

---
 crates/sui-indexer-alt/src/handlers/tx_affected_objects.rs | 6 +++---
 crates/sui-indexer-alt/src/models/transactions.rs          | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/crates/sui-indexer-alt/src/handlers/tx_affected_objects.rs b/crates/sui-indexer-alt/src/handlers/tx_affected_objects.rs
index 906637d4b2a97..b8faa18a52ae4 100644
--- a/crates/sui-indexer-alt/src/handlers/tx_affected_objects.rs
+++ b/crates/sui-indexer-alt/src/handlers/tx_affected_objects.rs
@@ -7,7 +7,7 @@ use anyhow::Result;
 use diesel_async::RunQueryDsl;
 use sui_types::{effects::TransactionEffectsAPI, full_checkpoint_content::CheckpointData};
 
-use crate::{db, models::transactions::StoredTxAffectedObjects, schema::tx_affected_objects};
+use crate::{db, models::transactions::StoredTxAffectedObject, schema::tx_affected_objects};
 
 use super::Handler;
 
@@ -21,7 +21,7 @@ impl Handler for TxAffectedObjects {
     const CHUNK_SIZE: usize = 1000;
     const MAX_PENDING_SIZE: usize = 10000;
 
-    type Value = StoredTxAffectedObjects;
+    type Value = StoredTxAffectedObject;
 
     fn handle(checkpoint: &Arc<CheckpointData>) -> Result<Vec<Self::Value>> {
         let CheckpointData {
@@ -41,7 +41,7 @@ impl Handler for TxAffectedObjects {
                 tx.effects
                     .object_changes()
                     .iter()
-                    .map(|o| StoredTxAffectedObjects {
+                    .map(|o| StoredTxAffectedObject {
                         tx_sequence_number,
                         affected: o.id.to_vec(),
                         sender: sender.to_vec(),
diff --git a/crates/sui-indexer-alt/src/models/transactions.rs b/crates/sui-indexer-alt/src/models/transactions.rs
index 044daef779ad7..4c505fb812d5e 100644
--- a/crates/sui-indexer-alt/src/models/transactions.rs
+++ b/crates/sui-indexer-alt/src/models/transactions.rs
@@ -34,7 +34,7 @@ pub struct StoredTransaction {
 
 #[derive(Insertable, Debug, Clone)]
 #[diesel(table_name = tx_affected_objects)]
-pub struct StoredTxAffectedObjects {
+pub struct StoredTxAffectedObject {
     pub tx_sequence_number: i64,
     pub affected: Vec<u8>,
     pub sender: Vec<u8>,

From e46025c7f4232d91a1109b5e4eb004c1c3b87a21 Mon Sep 17 00:00:00 2001
From: Ashok Menon <ashok@mystenlabs.com>
Date: Wed, 16 Oct 2024 22:59:47 +0100
Subject: [PATCH 23/33] indexer-alt: Make tx_digest the key for kv_transactions

---
 .../2024-10-15-170316_transactions/up.sql     |  2 +-
 .../src/handlers/kv_transactions.rs           | 20 ++++++++++---------
 .../src/models/transactions.rs                |  2 +-
 crates/sui-indexer-alt/src/schema.rs          |  4 ++--
 4 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/crates/sui-indexer-alt/migrations/2024-10-15-170316_transactions/up.sql b/crates/sui-indexer-alt/migrations/2024-10-15-170316_transactions/up.sql
index 394da91909d72..cb0cdd5d68b01 100644
--- a/crates/sui-indexer-alt/migrations/2024-10-15-170316_transactions/up.sql
+++ b/crates/sui-indexer-alt/migrations/2024-10-15-170316_transactions/up.sql
@@ -1,6 +1,6 @@
 CREATE TABLE IF NOT EXISTS kv_transactions
 (
-    tx_sequence_number          BIGINT        PRIMARY KEY,
+    tx_digest                   BYTEA         PRIMARY KEY,
     cp_sequence_number          BIGINT        NOT NULL,
     timestamp_ms                BIGINT        NOT NULL,
     -- BCS serialized TransactionData
diff --git a/crates/sui-indexer-alt/src/handlers/kv_transactions.rs b/crates/sui-indexer-alt/src/handlers/kv_transactions.rs
index 55371f50fac5a..7c8241724e5e9 100644
--- a/crates/sui-indexer-alt/src/handlers/kv_transactions.rs
+++ b/crates/sui-indexer-alt/src/handlers/kv_transactions.rs
@@ -30,26 +30,28 @@ impl Handler for KvTransactions {
             ..
         } = checkpoint.as_ref();
 
-        let mut values = Vec::with_capacity(transactions.len());
-        let first_tx = checkpoint_summary.network_total_transactions as usize - transactions.len();
+        let cp_sequence_number = checkpoint_summary.sequence_number as i64;
 
+        let mut values = Vec::with_capacity(transactions.len());
         for (i, tx) in transactions.iter().enumerate() {
-            let tx_sequence_number = (first_tx + i) as i64;
+            let tx_digest = tx.transaction.digest();
             let transaction = &tx.transaction.data().intent_message().value;
+
             let effects = &tx.effects;
             let events: Vec<_> = tx.events.iter().flat_map(|e| e.data.iter()).collect();
 
             values.push(StoredTransaction {
-                tx_sequence_number: (first_tx + i) as i64,
-                cp_sequence_number: checkpoint_summary.sequence_number as i64,
+                tx_digest: tx_digest.inner().into(),
+                cp_sequence_number,
                 timestamp_ms: checkpoint_summary.timestamp_ms as i64,
-                raw_transaction: bcs::to_bytes(transaction)
-                    .with_context(|| format!("Serializing transaction {tx_sequence_number}"))?,
+                raw_transaction: bcs::to_bytes(transaction).with_context(|| {
+                    format!("Serializing transaction {tx_digest} (cp {cp_sequence_number}, tx {i})")
+                })?,
                 raw_effects: bcs::to_bytes(effects).with_context(|| {
-                    format!("Serializing effects for transaction {tx_sequence_number}")
+                    format!("Serializing effects for transaction {tx_digest} (cp {cp_sequence_number}, tx {i})")
                 })?,
                 events: bcs::to_bytes(&events).with_context(|| {
-                    format!("Serializing events for transaction {tx_sequence_number}")
+                    format!("Serializing events for transaction {tx_digest} (cp {cp_sequence_number}, tx {i})")
                 })?,
             });
         }
diff --git a/crates/sui-indexer-alt/src/models/transactions.rs b/crates/sui-indexer-alt/src/models/transactions.rs
index 4c505fb812d5e..57a3222bf4448 100644
--- a/crates/sui-indexer-alt/src/models/transactions.rs
+++ b/crates/sui-indexer-alt/src/models/transactions.rs
@@ -24,7 +24,7 @@ pub enum BalanceChange {
 #[derive(Insertable, Debug, Clone)]
 #[diesel(table_name = kv_transactions)]
 pub struct StoredTransaction {
-    pub tx_sequence_number: i64,
+    pub tx_digest: Vec<u8>,
     pub cp_sequence_number: i64,
     pub timestamp_ms: i64,
     pub raw_transaction: Vec<u8>,
diff --git a/crates/sui-indexer-alt/src/schema.rs b/crates/sui-indexer-alt/src/schema.rs
index 8404c4b3041fa..68f84c764848e 100644
--- a/crates/sui-indexer-alt/src/schema.rs
+++ b/crates/sui-indexer-alt/src/schema.rs
@@ -19,8 +19,8 @@ diesel::table! {
 }
 
 diesel::table! {
-    kv_transactions (tx_sequence_number) {
-        tx_sequence_number -> Int8,
+    kv_transactions (tx_digest) {
+        tx_digest -> Bytea,
         cp_sequence_number -> Int8,
         timestamp_ms -> Int8,
         raw_transaction -> Bytea,

From fa62a00af5e7ecf1c733eaac746a001f9adf484a Mon Sep 17 00:00:00 2001
From: Ashok Menon <ashok@mystenlabs.com>
Date: Wed, 16 Oct 2024 23:40:45 +0100
Subject: [PATCH 24/33] refactor(indexer-alt): Introduce Indexer abstraction

## Description

Introduce an `Indexer` type to reduce the boilerplate of introducing a
new pipeline, and gracefully shutting everything down.

This also prepares the codebase for watermarks support: The initial
checkpoint to start ingestion at can be calculated by taking the MIN
checkpoint in the watermark table across all pipelines.

## Test plan

Unit tests:

```
sui$ cargo nextest run -p sui-indexer-alt
```

And run the indexer locally.
---
 crates/sui-indexer-alt/src/args.rs | 16 +----
 crates/sui-indexer-alt/src/lib.rs  | 98 ++++++++++++++++++++++++++++++
 crates/sui-indexer-alt/src/main.rs | 92 ++++------------------------
 3 files changed, 111 insertions(+), 95 deletions(-)

diff --git a/crates/sui-indexer-alt/src/args.rs b/crates/sui-indexer-alt/src/args.rs
index adbf028c9b57c..f094f377e437f 100644
--- a/crates/sui-indexer-alt/src/args.rs
+++ b/crates/sui-indexer-alt/src/args.rs
@@ -1,22 +1,10 @@
 // Copyright (c) Mysten Labs, Inc.
 // SPDX-License-Identifier: Apache-2.0
 
-use std::net::SocketAddr;
-
-use crate::{db::DbConfig, handlers::CommitterConfig, ingestion::IngestionConfig};
+use crate::IndexerConfig;
 
 #[derive(clap::Parser, Debug, Clone)]
 pub struct Args {
     #[command(flatten)]
-    pub ingestion: IngestionConfig,
-
-    #[command(flatten)]
-    pub db: DbConfig,
-
-    #[command(flatten)]
-    pub committer: CommitterConfig,
-
-    /// Address to serve Prometheus Metrics from.
-    #[arg(long, default_value = "0.0.0.0:9184")]
-    pub metrics_address: SocketAddr,
+    pub indexer_config: IndexerConfig,
 }
diff --git a/crates/sui-indexer-alt/src/lib.rs b/crates/sui-indexer-alt/src/lib.rs
index cabf14b6ce038..0993421bcb19f 100644
--- a/crates/sui-indexer-alt/src/lib.rs
+++ b/crates/sui-indexer-alt/src/lib.rs
@@ -1,6 +1,17 @@
 // Copyright (c) Mysten Labs, Inc.
 // SPDX-License-Identifier: Apache-2.0
 
+use std::{net::SocketAddr, sync::Arc};
+
+use anyhow::{Context, Result};
+use db::{Db, DbConfig};
+use handlers::{pipeline, CommitterConfig, Handler};
+use ingestion::{IngestionConfig, IngestionService};
+use metrics::{IndexerMetrics, MetricsService};
+use task::graceful_shutdown;
+use tokio::task::JoinHandle;
+use tokio_util::sync::CancellationToken;
+
 pub mod args;
 pub mod db;
 pub mod handlers;
@@ -9,3 +20,90 @@ pub mod metrics;
 pub mod models;
 pub mod schema;
 pub mod task;
+
+pub struct Indexer {
+    db: Db,
+    metrics: Arc<IndexerMetrics>,
+    metrics_service: MetricsService,
+    ingestion_service: IngestionService,
+    committer_config: CommitterConfig,
+    cancel: CancellationToken,
+    handles: Vec<JoinHandle<()>>,
+}
+
+#[derive(clap::Args, Debug, Clone)]
+pub struct IndexerConfig {
+    #[command(flatten)]
+    pub ingestion_config: IngestionConfig,
+
+    #[command(flatten)]
+    pub db_config: DbConfig,
+
+    #[command(flatten)]
+    pub committer_config: CommitterConfig,
+
+    /// Address to serve Prometheus Metrics from.
+    #[arg(long, default_value = "0.0.0.0:9184")]
+    pub metrics_address: SocketAddr,
+}
+
+impl Indexer {
+    pub async fn new(config: IndexerConfig, cancel: CancellationToken) -> Result<Self> {
+        let IndexerConfig {
+            ingestion_config,
+            db_config,
+            committer_config,
+            metrics_address,
+        } = config;
+
+        let db = Db::new(db_config)
+            .await
+            .context("Failed to connect to database")?;
+
+        let (metrics, metrics_service) =
+            MetricsService::new(metrics_address, db.clone(), cancel.clone())?;
+        let ingestion_service =
+            IngestionService::new(ingestion_config, metrics.clone(), cancel.clone())?;
+
+        Ok(Self {
+            db,
+            metrics,
+            metrics_service,
+            ingestion_service,
+            committer_config,
+            cancel,
+            handles: vec![],
+        })
+    }
+
+    pub fn pipeline<H: Handler + 'static>(&mut self) {
+        let (handler, committer) = pipeline::<H>(
+            self.db.clone(),
+            self.ingestion_service.subscribe(),
+            self.committer_config.clone(),
+            self.metrics.clone(),
+            self.cancel.clone(),
+        );
+
+        self.handles.push(handler);
+        self.handles.push(committer);
+    }
+
+    pub async fn run(mut self) -> Result<JoinHandle<()>> {
+        self.handles.push(
+            self.metrics_service
+                .run()
+                .await
+                .context("Failed to start metrics service")?,
+        );
+
+        self.handles.push(
+            self.ingestion_service
+                .run()
+                .await
+                .context("Failed to start ingestion service")?,
+        );
+
+        Ok(tokio::spawn(graceful_shutdown(self.handles, self.cancel)))
+    }
+}
diff --git a/crates/sui-indexer-alt/src/main.rs b/crates/sui-indexer-alt/src/main.rs
index 87b6e341a2eda..e55b23f03e81b 100644
--- a/crates/sui-indexer-alt/src/main.rs
+++ b/crates/sui-indexer-alt/src/main.rs
@@ -5,14 +5,11 @@ use anyhow::{Context, Result};
 use clap::Parser;
 use sui_indexer_alt::{
     args::Args,
-    db::Db,
     handlers::{
         kv_checkpoints::KvCheckpoints, kv_objects::KvObjects, kv_transactions::KvTransactions,
-        pipeline, tx_affected_objects::TxAffectedObjects, tx_balance_changes::TxBalanceChanges,
+        tx_affected_objects::TxAffectedObjects, tx_balance_changes::TxBalanceChanges,
     },
-    ingestion::IngestionService,
-    metrics::MetricsService,
-    task::graceful_shutdown,
+    Indexer,
 };
 use tokio_util::sync::CancellationToken;
 
@@ -27,85 +24,18 @@ async fn main() -> Result<()> {
 
     let cancel = CancellationToken::new();
 
-    let db = Db::new(args.db)
-        .await
-        .context("Failed to connect to database")?;
+    let mut indexer = Indexer::new(args.indexer_config, cancel.clone()).await?;
 
-    let (metrics, metrics_service) =
-        MetricsService::new(args.metrics_address, db.clone(), cancel.clone())?;
-    let mut ingestion_service =
-        IngestionService::new(args.ingestion, metrics.clone(), cancel.clone())?;
+    indexer.pipeline::<KvCheckpoints>();
+    indexer.pipeline::<KvObjects>();
+    indexer.pipeline::<KvTransactions>();
+    indexer.pipeline::<TxAffectedObjects>();
+    indexer.pipeline::<TxBalanceChanges>();
 
-    let h_metrics = metrics_service
-        .run()
-        .await
-        .context("Failed to start metrics service")?;
+    let h_indexer = indexer.run().await.context("Failed to start indexer")?;
 
-    let (h_cp_handler, h_cp_committer) = pipeline::<KvCheckpoints>(
-        db.clone(),
-        ingestion_service.subscribe(),
-        args.committer.clone(),
-        metrics.clone(),
-        cancel.clone(),
-    );
-
-    let (h_obj_handler, h_obj_committer) = pipeline::<KvObjects>(
-        db.clone(),
-        ingestion_service.subscribe(),
-        args.committer.clone(),
-        metrics.clone(),
-        cancel.clone(),
-    );
-
-    let (h_tx_handler, h_tx_committer) = pipeline::<KvTransactions>(
-        db.clone(),
-        ingestion_service.subscribe(),
-        args.committer.clone(),
-        metrics.clone(),
-        cancel.clone(),
-    );
-
-    let (h_tx_objs_handler, h_tx_objs_committer) = pipeline::<TxAffectedObjects>(
-        db.clone(),
-        ingestion_service.subscribe(),
-        args.committer.clone(),
-        metrics.clone(),
-        cancel.clone(),
-    );
-
-    let (h_tx_bal_handler, h_tx_bal_committer) = pipeline::<TxBalanceChanges>(
-        db.clone(),
-        ingestion_service.subscribe(),
-        args.committer.clone(),
-        metrics.clone(),
-        cancel.clone(),
-    );
-
-    let h_ingestion = ingestion_service
-        .run()
-        .await
-        .context("Failed to start ingestion service")?;
-
-    // Once we receive a Ctrl-C or one of the services panics or is cancelled, notify all services
-    // to shutdown, and wait for them to finish.
-    graceful_shutdown(
-        [
-            h_cp_handler,
-            h_cp_committer,
-            h_obj_handler,
-            h_obj_committer,
-            h_tx_handler,
-            h_tx_committer,
-            h_tx_objs_handler,
-            h_tx_objs_committer,
-            h_tx_bal_handler,
-            h_tx_bal_committer,
-            h_metrics,
-            h_ingestion,
-        ],
-        cancel,
-    )
-    .await;
+    cancel.cancelled().await;
+    let _ = h_indexer.await;
 
     Ok(())
 }

From e54e47fc953ca51bfc226f601c1f37f30997c0ad Mon Sep 17 00:00:00 2001
From: Ashok Menon <ashok@mystenlabs.com>
Date: Wed, 16 Oct 2024 23:47:09 +0100
Subject: [PATCH 25/33] chore(indexer-alt): Fix typos

---
 crates/sui-indexer-alt/src/handlers/mod.rs | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/crates/sui-indexer-alt/src/handlers/mod.rs b/crates/sui-indexer-alt/src/handlers/mod.rs
index ced26a6c63a7d..5fa3446924b82 100644
--- a/crates/sui-indexer-alt/src/handlers/mod.rs
+++ b/crates/sui-indexer-alt/src/handlers/mod.rs
@@ -209,7 +209,7 @@ fn handler<H: Handler + 'static>(
     })
 }
 
-/// The commiter task is responsible for gathering rows to write to the database. It is a single
+/// The committer task is responsible for gathering rows to write to the database. It is a single
 /// work loop that gathers checkpoint-wise row information, and periodically writes them out to the
 /// database.
 ///
@@ -227,7 +227,7 @@ fn handler<H: Handler + 'static>(
 /// - Number of pending rows. If this exceeds `H::BATCH_SIZE`, the committer will attempt to write
 ///   out at most `H::CHUNK_SIZE` worth of rows to the DB.
 ///
-/// If a write fails, the commiter will save the batch it meant to write and try to write them
+/// If a write fails, the committer will save the batch it meant to write and try to write them
 /// again at the next opportunity, potentially adding more rows to the batch if more have arrived
 /// in the interim.
 ///
@@ -252,7 +252,7 @@ fn committer<H: Handler + 'static>(
         cool.set_missed_tick_behavior(MissedTickBehavior::Delay);
 
         // Buffer to gather the next batch to write. This may be non-empty at the top of a tick of
-        // the commiter's loop if the previous attempt at a write failed. Attempt is incremented
+        // the committer's loop if the previous attempt at a write failed. Attempt is incremented
         // every time a batch write fails, and is reset when it succeeds.
         let mut attempt = 0;
         let mut batch = vec![];
@@ -271,7 +271,7 @@ fn committer<H: Handler + 'static>(
                 biased;
 
                 _ = cancel.cancelled() => {
-                    info!(pipline = H::NAME, "Shutdown received, stopping committer");
+                    info!(pipeline = H::NAME, "Shutdown received, stopping committer");
                     break;
                 }
 

From 85107660edce67a0f286929b5e743f4e65cec33f Mon Sep 17 00:00:00 2001
From: Ashok Menon <ashok@mystenlabs.com>
Date: Thu, 17 Oct 2024 15:15:11 +0100
Subject: [PATCH 26/33] indexer-alt: committer watermarks

## Description

Introduce the watermarks table, and update the committer to update it,
taking into account out-of-order writes to the underlying tables.

This change also allows the indexer to pick up where it left off by
consulting the watermarks table for the high watermarks it had
previously written.

It also introduces the ability to configure the range of checkpoints
being indexed (set an upper and lowerbound).

The metrics introduced by this change can be used to track per-pipeline
checkpoint rate, epoch rate and transaction rate.

Some TODOs have also been added for various tests and experiments that
should be run based on this work.

## Test plan

A lot of manual testing. Some TODOs have been left for more extensive
testing once there is more of a test set-up (depends on tempdb and
auto-migration work).
---
 .../2024-10-16-225607_watermarks/down.sql     |   1 +
 .../2024-10-16-225607_watermarks/up.sql       |  34 ++
 crates/sui-indexer-alt/src/handlers/mod.rs    | 313 ++++++++++++++----
 crates/sui-indexer-alt/src/ingestion/mod.rs   |  36 +-
 crates/sui-indexer-alt/src/lib.rs             |  79 ++++-
 crates/sui-indexer-alt/src/main.rs            |  10 +-
 crates/sui-indexer-alt/src/metrics.rs         |  83 ++++-
 crates/sui-indexer-alt/src/models/mod.rs      |   1 +
 .../sui-indexer-alt/src/models/watermarks.rs  | 136 ++++++++
 crates/sui-indexer-alt/src/schema.rs          |  14 +
 10 files changed, 622 insertions(+), 85 deletions(-)
 create mode 100644 crates/sui-indexer-alt/migrations/2024-10-16-225607_watermarks/down.sql
 create mode 100644 crates/sui-indexer-alt/migrations/2024-10-16-225607_watermarks/up.sql
 create mode 100644 crates/sui-indexer-alt/src/models/watermarks.rs

diff --git a/crates/sui-indexer-alt/migrations/2024-10-16-225607_watermarks/down.sql b/crates/sui-indexer-alt/migrations/2024-10-16-225607_watermarks/down.sql
new file mode 100644
index 0000000000000..e9de336153f62
--- /dev/null
+++ b/crates/sui-indexer-alt/migrations/2024-10-16-225607_watermarks/down.sql
@@ -0,0 +1 @@
+DROP TABLE IF EXISTS watermarks;
diff --git a/crates/sui-indexer-alt/migrations/2024-10-16-225607_watermarks/up.sql b/crates/sui-indexer-alt/migrations/2024-10-16-225607_watermarks/up.sql
new file mode 100644
index 0000000000000..73bdc70055246
--- /dev/null
+++ b/crates/sui-indexer-alt/migrations/2024-10-16-225607_watermarks/up.sql
@@ -0,0 +1,34 @@
+CREATE TABLE IF NOT EXISTS watermarks
+(
+    -- The pipeline governed by this watermark, i.e `epochs`, `checkpoints`,
+    -- `transactions`.
+    pipeline                    TEXT          PRIMARY KEY,
+    -- Inclusive upper epoch bound for this entity's data. Committer updates
+    -- this field. Pruner uses this to determine if pruning is necessary based
+    -- on the retention policy.
+    epoch_hi_inclusive          BIGINT        NOT NULL,
+    -- Inclusive upper checkpoint bound for this entity's data. Committer
+    -- updates this field. All data of this entity in the checkpoint must be
+    -- persisted before advancing this watermark. The committer refers to this
+    -- on disaster recovery to resume writing.
+    checkpoint_hi_inclusive     BIGINT        NOT NULL,
+    -- Exclusive upper transaction sequence number bound for this entity's
+    -- data. Committer updates this field.
+    tx_hi                       BIGINT        NOT NULL,
+    -- Inclusive lower epoch bound for this entity's data. Pruner updates this
+    -- field when the epoch range exceeds the retention policy.
+    epoch_lo                    BIGINT        NOT NULL,
+    -- Inclusive low watermark that the pruner advances. Corresponds to the
+    -- epoch id, checkpoint sequence number, or tx sequence number depending on
+    -- the entity. Data before this watermark is considered pruned by a reader.
+    -- The underlying data may still exist in the db instance.
+    reader_lo                   BIGINT        NOT NULL,
+    -- Updated using the database's current timestamp when the pruner sees that
+    -- some data needs to be dropped. The pruner uses this column to determine
+    -- whether to prune or wait long enough that all in-flight reads complete
+    -- or timeout before it acts on an updated watermark.
+    timestamp_ms                BIGINT        NOT NULL,
+    -- Column used by the pruner to track its true progress. Data below this
+    -- watermark can be immediately pruned.
+    pruner_hi                   BIGINT        NOT NULL
+);
diff --git a/crates/sui-indexer-alt/src/handlers/mod.rs b/crates/sui-indexer-alt/src/handlers/mod.rs
index 5fa3446924b82..b963bba35665a 100644
--- a/crates/sui-indexer-alt/src/handlers/mod.rs
+++ b/crates/sui-indexer-alt/src/handlers/mod.rs
@@ -1,7 +1,11 @@
 // Copyright (c) Mysten Labs, Inc.
 // SPDX-License-Identifier: Apache-2.0
 
-use std::{collections::BTreeMap, sync::Arc, time::Duration};
+use std::{
+    collections::{BTreeMap, BTreeSet},
+    sync::Arc,
+    time::Duration,
+};
 
 use futures::TryStreamExt;
 use mysten_metrics::spawn_monitored_task;
@@ -18,6 +22,7 @@ use tracing::{debug, error, info, warn};
 use crate::{
     db::{self, Db},
     metrics::IndexerMetrics,
+    models::watermarks::{CommitterWatermark, Ordering},
 };
 
 pub mod kv_checkpoints;
@@ -36,6 +41,10 @@ const COOLDOWN_INTERVAL: Duration = Duration::from_millis(20);
 /// The committer will wait at least this long between attempts to commit a failed batch.
 const RETRY_INTERVAL: Duration = Duration::from_millis(100);
 
+/// Tracing message for the watermark update will be logged at info level at least this many
+/// checkpoints.
+const LOUD_WATERMARK_UPDATE_INTERVAL: i64 = 5 * 10;
+
 /// Handlers implement the logic for a given indexing pipeline: How to process checkpoint data into
 /// rows for their table, and how to write those rows to the database.
 ///
@@ -92,28 +101,52 @@ pub struct CommitterConfig {
 /// A batch of processed values associated with a single checkpoint. This is an internal type used
 /// to communicate between the handler and the committer parts of the pipeline.
 struct Indexed<H: Handler> {
-    sequence_number: u64,
+    /// Epoch this data is from
+    epoch: u64,
+    /// Checkpoint this data is from
+    cp_sequence_number: u64,
+    /// Max (exclusive) transaction sequence number in this batch
+    tx_hi: u64,
+    /// Values to be inserted into the database from this checkpoint
     values: Vec<H::Value>,
 }
 
-/// Start a new indexing pipeline served by the handler, `H`. Each pipeline consists of a handler
-/// task which takes checkpoint data and breaks it down into rows, ready for insertion, and a
-/// committer which writes those rows out to the database.
+impl<H: Handler> Indexed<H> {
+    /// Split apart the information in this indexed checkpoint into its watermark and the values to
+    /// add to the database.
+    fn into_batch(self) -> (CommitterWatermark<'static>, Vec<H::Value>) {
+        let watermark = CommitterWatermark {
+            pipeline: H::NAME.into(),
+            epoch_hi_inclusive: self.epoch as i64,
+            checkpoint_hi_inclusive: self.cp_sequence_number as i64,
+            tx_hi: self.tx_hi as i64,
+        };
+
+        (watermark, self.values)
+    }
+}
+
+/// Start a new indexing pipeline served by the handler, `H`. Starting strictly after the
+/// `watermark` (or from the beginning if no watermark was provided).
+///
+/// Each pipeline consists of a handler task which takes checkpoint data and breaks it down into
+/// rows, ready for insertion, and a committer which writes those rows out to the database.
 ///
 /// Checkpoint data is fed into the pipeline through the `handler_rx` channel, and an internal
 /// channel is created to communicate checkpoint-wise data to the committer. The pipeline can be
 /// shutdown using its `cancel` token.
 pub fn pipeline<H: Handler + 'static>(
+    watermark: Option<CommitterWatermark<'static>>,
+    config: CommitterConfig,
     db: Db,
     handler_rx: mpsc::Receiver<Arc<CheckpointData>>,
-    config: CommitterConfig,
     metrics: Arc<IndexerMetrics>,
     cancel: CancellationToken,
 ) -> (JoinHandle<()>, JoinHandle<()>) {
     let (handler_tx, committer_rx) = mpsc::channel(H::FANOUT + COMMITTER_BUFFER);
 
     let handler = handler::<H>(handler_rx, handler_tx, metrics.clone(), cancel.clone());
-    let committer = committer::<H>(db, committer_rx, config, metrics, cancel);
+    let committer = committer::<H>(watermark, config, committer_rx, db, metrics, cancel);
 
     (handler, committer)
 }
@@ -185,8 +218,14 @@ fn handler<H: Handler + 'static>(
                         .with_label_values(&[H::NAME])
                         .inc_by(values.len() as u64);
 
+                    let epoch = checkpoint.checkpoint_summary.epoch;
+                    let cp_sequence_number = checkpoint.checkpoint_summary.sequence_number;
+                    let tx_hi = checkpoint.checkpoint_summary.network_total_transactions;
+
                     tx.send(Indexed {
-                        sequence_number: checkpoint.checkpoint_summary.sequence_number,
+                        epoch,
+                        cp_sequence_number,
+                        tx_hi,
                         values,
                     })
                     .await
@@ -234,9 +273,10 @@ fn handler<H: Handler + 'static>(
 /// This task will shutdown if canceled via the `cancel` token, or if the channel it receives data
 /// on has been closed by the handler for some reason.
 fn committer<H: Handler + 'static>(
-    db: Db,
-    mut rx: mpsc::Receiver<Indexed<H>>,
+    watermark: Option<CommitterWatermark<'static>>,
     config: CommitterConfig,
+    mut rx: mpsc::Receiver<Indexed<H>>,
+    db: Db,
     metrics: Arc<IndexerMetrics>,
     cancel: CancellationToken,
 ) -> JoinHandle<()> {
@@ -255,15 +295,39 @@ fn committer<H: Handler + 'static>(
         // the committer's loop if the previous attempt at a write failed. Attempt is incremented
         // every time a batch write fails, and is reset when it succeeds.
         let mut attempt = 0;
-        let mut batch = vec![];
+        let mut batch_values = vec![];
+        let mut batch_watermarks = vec![];
 
         // Data for checkpoints that haven't been written yet. Note that `pending_rows` includes
         // rows in `batch`.
-        let mut pending: BTreeMap<u64, Vec<H::Value>> = BTreeMap::new();
+        let mut pending: BTreeMap<u64, Indexed<H>> = BTreeMap::new();
         let mut pending_rows = 0;
 
-        // TODO: Track the watermark (keep track of out-of-order commits, and update the watermark
-        // table when we accumulate a run of contiguous commits starting from the current watermark).
+        // Track the high watermark for the pipeline. The pipeline confirms that it has written all
+        // checkpoint data up from the watermark it is initialised with up to and including this
+        // watermark.
+        //
+        // To correctly update the watermark, the committer tracks the watermark it last tried to
+        // write and the watermarks for any checkpoints that have been written since then
+        // ("pre-committed"). After each batch is written, the committer will try to progress the
+        // watermark as much as possible without going over any holes in the sequence of
+        // checkpoints.
+        //
+        // NOTE: When no watermark is provided, it is assumed that the pipeline is starting from
+        // scratch, but we still initialize it as if it is at (after) the genesis checkpoint. This
+        // means we never write a watermark for the genesis checkpoint, but would wait for another
+        // checkpoint to be written out before updating the watermark, which is fine in practice
+        // and simplifies the logic of tracking watermarks.
+        let mut precommitted: BTreeSet<CommitterWatermark<'static>> = BTreeSet::new();
+        let mut watermark =
+            watermark.unwrap_or_else(|| CommitterWatermark::initial(H::NAME.into()));
+
+        // The committer will periodically output a log message at a higher log level to
+        // demonstrate that the pipeline is making progress.
+        let mut next_loud_watermark_update =
+            watermark.checkpoint_hi_inclusive + LOUD_WATERMARK_UPDATE_INTERVAL;
+
+        info!(pipeline = H::NAME, ?watermark, "Starting committer");
 
         loop {
             tokio::select! {
@@ -288,19 +352,22 @@ fn committer<H: Handler + 'static>(
                         .with_label_values(&[H::NAME])
                         .start_timer();
 
-                    while batch.len() < H::CHUNK_SIZE {
+                    while batch_values.len() < H::CHUNK_SIZE {
                         let Some(mut entry) = pending.first_entry() else {
                             break;
                         };
 
-                        let values = entry.get_mut();
-                        if batch.len() + values.len() > H::CHUNK_SIZE {
-                            let mut for_batch = values.split_off(H::CHUNK_SIZE - batch.len());
+                        let indexed = entry.get_mut();
+                        let values = &mut indexed.values;
+                        if batch_values.len() + values.len() > H::CHUNK_SIZE {
+                            let mut for_batch = values.split_off(H::CHUNK_SIZE - batch_values.len());
                             std::mem::swap(values, &mut for_batch);
-                            batch.extend(for_batch);
+                            batch_values.extend(for_batch);
                             break;
                         } else {
-                            batch.extend(entry.remove());
+                            let (watermark, values) = entry.remove().into_batch();
+                            batch_values.extend(values);
+                            batch_watermarks.push(watermark);
                         }
                     }
 
@@ -308,7 +375,7 @@ fn committer<H: Handler + 'static>(
                     debug!(
                         pipeline = H::NAME,
                         elapsed_ms = elapsed * 1000.0,
-                        rows = batch.len(),
+                        rows = batch_values.len(),
                         pending = pending_rows,
                         "Gathered batch",
                     );
@@ -344,43 +411,22 @@ fn committer<H: Handler + 'static>(
                     metrics
                         .committer_batch_size
                         .with_label_values(&[H::NAME])
-                        .observe(batch.len() as f64);
+                        .observe(batch_values.len() as f64);
 
                     let guard = metrics
                         .committer_commit_latency
                         .with_label_values(&[H::NAME])
                         .start_timer();
 
-                    match H::commit(&batch, &mut conn).await {
-                        Ok(affected) => {
-                            let elapsed = guard.stop_and_record();
-
-                            metrics
-                                .total_committer_rows_committed
-                                .with_label_values(&[H::NAME])
-                                .inc_by(batch.len() as u64);
-
-                            metrics
-                                .total_committer_rows_affected
-                                .with_label_values(&[H::NAME])
-                                .inc_by(affected as u64);
-
-                            debug!(
-                                pipeline = H::NAME,
-                                elapsed_ms = elapsed * 1000.0,
-                                attempt,
-                                affected,
-                                committed = batch.len(),
-                                pending = pending_rows,
-                                "Wrote batch",
-                            );
-
-                            pending_rows -= batch.len();
-                            attempt = 0;
+                    // TODO (optimization): Parallelize batch writes?
+                    //
+                    // Previous findings suggest that having about 5 parallel committers per table
+                    // yields the best performance. Is that still true for this new architecture?
+                    // If we go down this route, we should consider factoring that work out into a
+                    // separate task that also handles the watermark.
 
-                            batch.clear();
-                            cool.reset();
-                        }
+                    let affected = match H::commit(&batch_values, &mut conn).await {
+                        Ok(affected) => affected,
 
                         Err(e) => {
                             let elapsed = guard.stop_and_record();
@@ -389,16 +435,171 @@ fn committer<H: Handler + 'static>(
                                 pipeline = H::NAME,
                                 elapsed_ms = elapsed * 1000.0,
                                 attempt,
-                                committed = batch.len(),
+                                committed = batch_values.len(),
                                 pending = pending_rows,
                                 "Error writing batch: {e}",
                             );
 
                             cool.reset_after(RETRY_INTERVAL);
                             attempt += 1;
+                            continue;
+                        }
+                    };
+
+                    let elapsed = guard.stop_and_record();
+
+                    metrics
+                        .total_committer_rows_committed
+                        .with_label_values(&[H::NAME])
+                        .inc_by(batch_values.len() as u64);
+
+                    metrics
+                        .total_committer_rows_affected
+                        .with_label_values(&[H::NAME])
+                        .inc_by(affected as u64);
+
+                    debug!(
+                        pipeline = H::NAME,
+                        elapsed_ms = elapsed * 1000.0,
+                        attempt,
+                        affected,
+                        committed = batch_values.len(),
+                        pending = pending_rows,
+                        "Wrote batch",
+                    );
+
+                    pending_rows -= batch_values.len();
+                    attempt = 0;
+
+                    precommitted.extend(batch_watermarks.drain(..));
+                    batch_values.clear();
+
+                    // Check if the pipeline's watermark needs to be updated
+                    let guard = metrics
+                        .watermark_gather_latency
+                        .with_label_values(&[H::NAME])
+                        .start_timer();
+
+                    let mut watermark_needs_update = false;
+                    while let Some(pending) = precommitted.first() {
+                        match watermark.next_cmp(pending) {
+                            Ordering::Future => break,
+                            Ordering::Past => {
+                                // Out of order watermarks can be encountered when a pipeline is
+                                // starting up, because ingestion must start at the lowest
+                                // checkpoint across all pipelines, or because of a backfill, where
+                                // the initial checkpoint has been overridden.
+                                //
+                                // Track how many we see to make sure it doesn't grow without
+                                // bound.
+                                metrics
+                                    .total_watermarks_out_of_order
+                                    .with_label_values(&[H::NAME])
+                                    .inc();
+
+                                precommitted.pop_first().unwrap();
+                            }
+
+                            Ordering::Next => {
+                                // SAFETY: `precommitted` is known to be non-empty because of the loop
+                                // condition.
+                                watermark = precommitted.pop_first().unwrap();
+                                watermark_needs_update = true;
+                            }
+                        }
+                    }
+
+                    let elapsed = guard.stop_and_record();
+
+                    metrics
+                        .watermark_epoch
+                        .with_label_values(&[H::NAME])
+                        .set(watermark.epoch_hi_inclusive);
+
+                    metrics
+                        .watermark_checkpoint
+                        .with_label_values(&[H::NAME])
+                        .set(watermark.checkpoint_hi_inclusive);
+
+                    metrics
+                        .watermark_transaction
+                        .with_label_values(&[H::NAME])
+                        .set(watermark.tx_hi);
+
+                    debug!(
+                        pipeline = H::NAME,
+                        elapsed_ms = elapsed * 1000.0,
+                        watermark = watermark.checkpoint_hi_inclusive,
+                        pending = precommitted.len(),
+                        "Gathered watermarks",
+                    );
+
+                    if watermark_needs_update {
+                        let guard = metrics
+                            .watermark_commit_latency
+                            .with_label_values(&[H::NAME])
+                            .start_timer();
+
+                        match watermark.update(&mut conn).await {
+                            // If there's an issue updating the watermark, log it but keep going,
+                            // it's OK for the watermark to lag from a correctness perspective.
+                            Err(e) => {
+                                let elapsed = guard.stop_and_record();
+                                error!(
+                                    pipeline = H::NAME,
+                                    elapsed_ms = elapsed * 1000.0,
+                                    ?watermark,
+                                    "Error updating watermark: {e}",
+                                );
+                            }
+
+                            Ok(updated) => {
+                                let elapsed = guard.stop_and_record();
+
+                                if updated {
+                                    metrics
+                                        .watermark_epoch_in_db
+                                        .with_label_values(&[H::NAME])
+                                        .set(watermark.epoch_hi_inclusive);
+
+                                    metrics
+                                        .watermark_checkpoint_in_db
+                                        .with_label_values(&[H::NAME])
+                                        .set(watermark.checkpoint_hi_inclusive);
+
+                                    metrics
+                                        .watermark_transaction_in_db
+                                        .with_label_values(&[H::NAME])
+                                        .set(watermark.tx_hi);
+                                }
+
+                                if watermark.checkpoint_hi_inclusive > next_loud_watermark_update {
+                                    next_loud_watermark_update += LOUD_WATERMARK_UPDATE_INTERVAL;
+                                    info!(
+                                        pipeline = H::NAME,
+                                        elapsed_ms = elapsed * 1000.0,
+                                        updated,
+                                        epoch = watermark.epoch_hi_inclusive,
+                                        checkpoint = watermark.checkpoint_hi_inclusive,
+                                        transaction = watermark.tx_hi,
+                                        "Watermark",
+                                    );
+                                } else {
+                                    debug!(
+                                        pipeline = H::NAME,
+                                        elapsed_ms = elapsed * 1000.0,
+                                        updated,
+                                        epoch = watermark.epoch_hi_inclusive,
+                                        checkpoint = watermark.checkpoint_hi_inclusive,
+                                        transaction = watermark.tx_hi,
+                                        "Watermark",
+                                    );
+                                }
+                            }
                         }
                     }
 
+                    cool.reset();
                 }
 
                 // If there are enough pending rows, and we've expended the cooldown, reset the
@@ -409,14 +610,14 @@ fn committer<H: Handler + 'static>(
                 }
 
                 indexed = rx.recv(), if pending_rows < H::MAX_PENDING_SIZE => {
-                    if let Some(Indexed { sequence_number, values }) = indexed {
+                    if let Some(indexed) = indexed {
                         metrics
                             .total_committer_rows_received
                             .with_label_values(&[H::NAME])
-                            .inc_by(values.len() as u64);
+                            .inc_by(indexed.values.len() as u64);
 
-                        pending_rows += values.len();
-                        pending.insert(sequence_number, values);
+                        pending_rows += indexed.values.len();
+                        pending.insert(indexed.cp_sequence_number, indexed);
                     } else {
                         info!(pipeline = H::NAME, "Handler closed channel, stopping committer");
                         break;
diff --git a/crates/sui-indexer-alt/src/ingestion/mod.rs b/crates/sui-indexer-alt/src/ingestion/mod.rs
index b53cfa164d870..9c920a805415a 100644
--- a/crates/sui-indexer-alt/src/ingestion/mod.rs
+++ b/crates/sui-indexer-alt/src/ingestion/mod.rs
@@ -33,10 +33,6 @@ pub struct IngestionConfig {
     #[arg(long)]
     remote_store_url: Url,
 
-    /// First checkpoint to start ingestion from.
-    #[arg(long, default_value_t = 0)]
-    start_checkpoint: u64,
-
     /// Maximum size of checkpoint backlog across all workers downstream of the ingestion service.
     #[arg(long, default_value_t = 5000)]
     buffer_size: usize,
@@ -81,9 +77,9 @@ impl IngestionService {
 
     /// Start the ingestion service as a background task, consuming it in the process.
     ///
-    /// Checkpoints are fetched concurrently starting with the configured `start_checkpoint`, and
-    /// pushed to subscribers' channels (potentially out-of-order). Subscribers can communicate
-    /// with the ingestion service via their channels in the following ways:
+    /// Checkpoints are fetched concurrently from the `checkpoints` iterator, and pushed to
+    /// subscribers' channels (potentially out-of-order). Subscribers can communicate with the
+    /// ingestion service via their channels in the following ways:
     ///
     /// - If a subscriber is lagging (not receiving checkpoints fast enough), it will eventually
     ///   provide back-pressure to the ingestion service, which will stop fetching new checkpoints.
@@ -93,7 +89,11 @@ impl IngestionService {
     /// If ingestion reaches the leading edge of the network, it will encounter checkpoints that do
     /// not exist yet. These will be retried repeatedly on a fixed `retry_interval` until they
     /// become available.
-    pub async fn run(self) -> Result<JoinHandle<()>> {
+    pub async fn run<I>(self, checkpoints: I) -> Result<JoinHandle<()>>
+    where
+        I: IntoIterator<Item = u64> + Send + Sync + 'static,
+        I::IntoIter: Send + Sync + 'static,
+    {
         let IngestionService {
             config,
             client,
@@ -107,10 +107,9 @@ impl IngestionService {
         }
 
         Ok(spawn_monitored_task!(async move {
-            let start = config.start_checkpoint;
-            info!(start, "Starting ingestion service");
+            info!("Starting ingestion service");
 
-            match stream::iter(start..)
+            match stream::iter(checkpoints)
                 .map(Ok)
                 .try_for_each_concurrent(/* limit */ config.concurrency, |cp| {
                     let client = client.clone();
@@ -199,7 +198,6 @@ mod tests {
         IngestionService::new(
             IngestionConfig {
                 remote_store_url: Url::parse(&uri).unwrap(),
-                start_checkpoint: 0,
                 buffer_size,
                 concurrency,
                 retry_interval: Duration::from_millis(200),
@@ -245,7 +243,7 @@ mod tests {
         let cancel = CancellationToken::new();
         let ingestion_service = test_ingestion(server.uri(), 1, 1, cancel.clone()).await;
 
-        let err = ingestion_service.run().await.unwrap_err();
+        let err = ingestion_service.run(0..).await.unwrap_err();
         assert!(matches!(err, Error::NoSubscribers));
     }
 
@@ -267,7 +265,7 @@ mod tests {
 
         let rx = ingestion_service.subscribe();
         let subscriber_handle = test_subscriber(usize::MAX, rx, cancel.clone()).await;
-        let ingestion_handle = ingestion_service.run().await.unwrap();
+        let ingestion_handle = ingestion_service.run(0..).await.unwrap();
 
         cancel.cancel();
         subscriber_handle.await.unwrap();
@@ -292,7 +290,7 @@ mod tests {
 
         let rx = ingestion_service.subscribe();
         let subscriber_handle = test_subscriber(1, rx, cancel.clone()).await;
-        let ingestion_handle = ingestion_service.run().await.unwrap();
+        let ingestion_handle = ingestion_service.run(0..).await.unwrap();
 
         cancel.cancelled().await;
         subscriber_handle.await.unwrap();
@@ -313,7 +311,7 @@ mod tests {
 
         let rx = ingestion_service.subscribe();
         let subscriber_handle = test_subscriber(usize::MAX, rx, cancel.clone()).await;
-        let ingestion_handle = ingestion_service.run().await.unwrap();
+        let ingestion_handle = ingestion_service.run(0..).await.unwrap();
 
         cancel.cancelled().await;
         subscriber_handle.await.unwrap();
@@ -344,7 +342,7 @@ mod tests {
 
         let rx = ingestion_service.subscribe();
         let subscriber_handle = test_subscriber(5, rx, cancel.clone()).await;
-        let ingestion_handle = ingestion_service.run().await.unwrap();
+        let ingestion_handle = ingestion_service.run(0..).await.unwrap();
 
         cancel.cancelled().await;
         let seqs = subscriber_handle.await.unwrap();
@@ -376,7 +374,7 @@ mod tests {
 
         let rx = ingestion_service.subscribe();
         let subscriber_handle = test_subscriber(5, rx, cancel.clone()).await;
-        let ingestion_handle = ingestion_service.run().await.unwrap();
+        let ingestion_handle = ingestion_service.run(0..).await.unwrap();
 
         cancel.cancelled().await;
         let seqs = subscriber_handle.await.unwrap();
@@ -414,7 +412,7 @@ mod tests {
 
         let rx = ingestion_service.subscribe();
         let subscriber_handle = test_subscriber(5, rx, cancel.clone()).await;
-        let ingestion_handle = ingestion_service.run().await.unwrap();
+        let ingestion_handle = ingestion_service.run(0..).await.unwrap();
 
         // At this point, the service will have been able to pass 3 checkpoints to the non-lagging
         // subscriber, while the laggard's buffer fills up. Now the laggard will pull two
diff --git a/crates/sui-indexer-alt/src/lib.rs b/crates/sui-indexer-alt/src/lib.rs
index 0993421bcb19f..0842fd6426609 100644
--- a/crates/sui-indexer-alt/src/lib.rs
+++ b/crates/sui-indexer-alt/src/lib.rs
@@ -8,9 +8,11 @@ use db::{Db, DbConfig};
 use handlers::{pipeline, CommitterConfig, Handler};
 use ingestion::{IngestionConfig, IngestionService};
 use metrics::{IndexerMetrics, MetricsService};
+use models::watermarks::CommitterWatermark;
 use task::graceful_shutdown;
 use tokio::task::JoinHandle;
 use tokio_util::sync::CancellationToken;
+use tracing::info;
 
 pub mod args;
 pub mod db;
@@ -22,12 +24,36 @@ pub mod schema;
 pub mod task;
 
 pub struct Indexer {
+    /// Connection pool to the database.
     db: Db,
+
+    /// Prometheus Metrics.
     metrics: Arc<IndexerMetrics>,
+
+    /// Service for serving Prometheis metrics.
     metrics_service: MetricsService,
+
+    /// Service for downloading and disseminating checkpoint data.
     ingestion_service: IngestionService,
+
+    /// Parameters for the committers of each pipeline.
     committer_config: CommitterConfig,
+
+    /// Optional override of the checkpoint lowerbound.
+    first_checkpoint: Option<u64>,
+
+    /// Optional override of the checkpoint upperbound.
+    last_checkpoint: Option<u64>,
+
+    /// Cancellation token shared among all continuous tasks in the service.
     cancel: CancellationToken,
+
+    /// The checkpoint lowerbound derived from watermarks of pipelines added to the indexer. When
+    /// the indexer runs, it will start from this point, unless this has been overridden by
+    /// [Self::first_checkpoint].
+    first_checkpoint_from_watermark: u64,
+
+    /// The handles for every task spawned by this indexer, used to manage graceful shutdown.
     handles: Vec<JoinHandle<()>>,
 }
 
@@ -42,6 +68,17 @@ pub struct IndexerConfig {
     #[command(flatten)]
     pub committer_config: CommitterConfig,
 
+    /// Override for the checkpoint to start ingestion from -- useful for backfills. By default,
+    /// ingestion will start just after the lowest checkpoint watermark across all active
+    /// pipelines.
+    #[arg(long)]
+    first_checkpoint: Option<u64>,
+
+    /// Override for the checkpoint to end ingestion at (inclusive) -- useful for backfills. By
+    /// default, ingestion will not stop, and will continue to poll for new checkpoints.
+    #[arg(long)]
+    last_checkpoint: Option<u64>,
+
     /// Address to serve Prometheus Metrics from.
     #[arg(long, default_value = "0.0.0.0:9184")]
     pub metrics_address: SocketAddr,
@@ -53,6 +90,8 @@ impl Indexer {
             ingestion_config,
             db_config,
             committer_config,
+            first_checkpoint,
+            last_checkpoint,
             metrics_address,
         } = config;
 
@@ -71,24 +110,48 @@ impl Indexer {
             metrics_service,
             ingestion_service,
             committer_config,
+            first_checkpoint,
+            last_checkpoint,
             cancel,
+            first_checkpoint_from_watermark: u64::MAX,
             handles: vec![],
         })
     }
 
-    pub fn pipeline<H: Handler + 'static>(&mut self) {
+    /// Adds a new pipeline to this indexer and starts it up. Although their tasks have started,
+    /// they will be idle until the ingestion service starts, and serves it checkpoint data.
+    pub async fn pipeline<H: Handler + 'static>(&mut self) -> Result<()> {
+        let mut conn = self.db.connect().await.context("Failed DB connection")?;
+
+        let watermark = CommitterWatermark::get(&mut conn, H::NAME)
+            .await
+            .with_context(|| format!("Failed to get watermark for {}", H::NAME))?;
+
+        // TODO(amnn): Test this (depends on supporting migrations and tempdb).
+        self.first_checkpoint_from_watermark = watermark
+            .as_ref()
+            .map_or(0, |w| w.checkpoint_hi_inclusive as u64 + 1)
+            .min(self.first_checkpoint_from_watermark);
+
         let (handler, committer) = pipeline::<H>(
+            watermark,
+            self.committer_config.clone(),
             self.db.clone(),
             self.ingestion_service.subscribe(),
-            self.committer_config.clone(),
             self.metrics.clone(),
             self.cancel.clone(),
         );
 
         self.handles.push(handler);
         self.handles.push(committer);
+
+        Ok(())
     }
 
+    /// Start ingesting checkpoints. Ingestion either starts from the configured
+    /// `first_checkpoint`, or it is calculated based on the watermarks of all active pipelines.
+    /// Ingestion will stop after consuming the configured `last_checkpoint`, if one is provided,
+    /// or will continue until it tracks the tip of the network.
     pub async fn run(mut self) -> Result<JoinHandle<()>> {
         self.handles.push(
             self.metrics_service
@@ -97,9 +160,19 @@ impl Indexer {
                 .context("Failed to start metrics service")?,
         );
 
+        // If an override has been provided, start ingestion from there, otherwise start ingestion
+        // from just after the lowest committer watermark across all enabled pipelines.
+        let first_checkpoint = self
+            .first_checkpoint
+            .unwrap_or(self.first_checkpoint_from_watermark);
+
+        let last_checkpoint = self.last_checkpoint.unwrap_or(u64::MAX);
+
+        info!(first_checkpoint, last_checkpoint = ?self.last_checkpoint, "Ingestion range");
+
         self.handles.push(
             self.ingestion_service
-                .run()
+                .run(first_checkpoint..=last_checkpoint)
                 .await
                 .context("Failed to start ingestion service")?,
         );
diff --git a/crates/sui-indexer-alt/src/main.rs b/crates/sui-indexer-alt/src/main.rs
index e55b23f03e81b..12aa84ba5f7bc 100644
--- a/crates/sui-indexer-alt/src/main.rs
+++ b/crates/sui-indexer-alt/src/main.rs
@@ -26,11 +26,11 @@ async fn main() -> Result<()> {
 
     let mut indexer = Indexer::new(args.indexer_config, cancel.clone()).await?;
 
-    indexer.pipeline::<KvCheckpoints>();
-    indexer.pipeline::<KvObjects>();
-    indexer.pipeline::<KvTransactions>();
-    indexer.pipeline::<TxAffectedObjects>();
-    indexer.pipeline::<TxBalanceChanges>();
+    indexer.pipeline::<KvCheckpoints>().await?;
+    indexer.pipeline::<KvObjects>().await?;
+    indexer.pipeline::<KvTransactions>().await?;
+    indexer.pipeline::<TxAffectedObjects>().await?;
+    indexer.pipeline::<TxBalanceChanges>().await?;
 
     let h_indexer = indexer.run().await.context("Failed to start indexer")?;
 
diff --git a/crates/sui-indexer-alt/src/metrics.rs b/crates/sui-indexer-alt/src/metrics.rs
index 8d9e590854609..03f37e965dd31 100644
--- a/crates/sui-indexer-alt/src/metrics.rs
+++ b/crates/sui-indexer-alt/src/metrics.rs
@@ -10,8 +10,9 @@ use prometheus::{
     core::{Collector, Desc},
     proto::{Counter, Gauge, LabelPair, Metric, MetricFamily, MetricType, Summary},
     register_histogram_vec_with_registry, register_histogram_with_registry,
-    register_int_counter_vec_with_registry, register_int_counter_with_registry, Histogram,
-    HistogramVec, IntCounter, IntCounterVec, Registry,
+    register_int_counter_vec_with_registry, register_int_counter_with_registry,
+    register_int_gauge_vec_with_registry, Histogram, HistogramVec, IntCounter, IntCounterVec,
+    IntGaugeVec, Registry,
 };
 use tokio::{net::TcpListener, task::JoinHandle};
 use tokio_util::sync::CancellationToken;
@@ -79,6 +80,19 @@ pub struct IndexerMetrics {
     pub committer_gather_latency: HistogramVec,
     pub committer_commit_latency: HistogramVec,
     pub committer_batch_size: HistogramVec,
+
+    pub watermark_gather_latency: HistogramVec,
+    pub watermark_commit_latency: HistogramVec,
+
+    pub total_watermarks_out_of_order: IntCounterVec,
+
+    pub watermark_epoch: IntGaugeVec,
+    pub watermark_checkpoint: IntGaugeVec,
+    pub watermark_transaction: IntGaugeVec,
+
+    pub watermark_epoch_in_db: IntGaugeVec,
+    pub watermark_checkpoint_in_db: IntGaugeVec,
+    pub watermark_transaction_in_db: IntGaugeVec,
 }
 
 /// Collects information about the database connection pool.
@@ -280,6 +294,71 @@ impl IndexerMetrics {
                 registry,
             )
             .unwrap(),
+            watermark_gather_latency: register_histogram_vec_with_registry!(
+                "indexer_watermark_gather_latency",
+                "Time taken to calculate the new high watermark after a write by this committer",
+                &["pipeline"],
+                PROCESSING_LATENCY_SEC_BUCKETS.to_vec(),
+                registry,
+            )
+            .unwrap(),
+            watermark_commit_latency: register_histogram_vec_with_registry!(
+                "indexer_watermark_commit_latency",
+                "Time taken to write the new high watermark to the database by this committer",
+                &["pipeline"],
+                DB_UPDATE_LATENCY_SEC_BUCKETS.to_vec(),
+                registry,
+            )
+            .unwrap(),
+            total_watermarks_out_of_order: register_int_counter_vec_with_registry!(
+                "indexer_watermark_out_of_order",
+                "Number of times this committer encountered a batch for a checkpoint before its watermark",
+                &["pipeline"],
+                registry,
+            )
+            .unwrap(),
+            watermark_epoch: register_int_gauge_vec_with_registry!(
+                "indexer_watermark_epoch",
+                "Current epoch high watermark for this committer",
+                &["pipeline"],
+                registry,
+            )
+            .unwrap(),
+            watermark_checkpoint: register_int_gauge_vec_with_registry!(
+                "indexer_watermark_checkpoint",
+                "Current checkpoint high watermark for this committer",
+                &["pipeline"],
+                registry,
+            )
+            .unwrap(),
+            watermark_transaction: register_int_gauge_vec_with_registry!(
+                "indexer_watermark_transaction",
+                "Current transaction high watermark for this committer",
+                &["pipeline"],
+                registry,
+            )
+            .unwrap(),
+            watermark_epoch_in_db: register_int_gauge_vec_with_registry!(
+                "indexer_watermark_epoch_in_db",
+                "Last epoch high watermark this committer wrote to the DB",
+                &["pipeline"],
+                registry,
+            )
+            .unwrap(),
+            watermark_checkpoint_in_db: register_int_gauge_vec_with_registry!(
+                "indexer_watermark_checkpoint_in_db",
+                "Last checkpoint high watermark this committer wrote to the DB",
+                &["pipeline"],
+                registry,
+            )
+            .unwrap(),
+            watermark_transaction_in_db: register_int_gauge_vec_with_registry!(
+                "indexer_watermark_transaction_in_db",
+                "Last transaction high watermark this committer wrote to the DB",
+                &["pipeline"],
+                registry,
+            )
+            .unwrap(),
         }
     }
 
diff --git a/crates/sui-indexer-alt/src/models/mod.rs b/crates/sui-indexer-alt/src/models/mod.rs
index a5f137fbac1b7..dc5a42d38f71e 100644
--- a/crates/sui-indexer-alt/src/models/mod.rs
+++ b/crates/sui-indexer-alt/src/models/mod.rs
@@ -4,3 +4,4 @@
 pub mod checkpoints;
 pub mod objects;
 pub mod transactions;
+pub mod watermarks;
diff --git a/crates/sui-indexer-alt/src/models/watermarks.rs b/crates/sui-indexer-alt/src/models/watermarks.rs
new file mode 100644
index 0000000000000..5c4f9624862e1
--- /dev/null
+++ b/crates/sui-indexer-alt/src/models/watermarks.rs
@@ -0,0 +1,136 @@
+// Copyright (c) Mysten Labs, Inc.
+// SPDX-License-Identifier: Apache-2.0
+
+use std::{borrow::Cow, cmp};
+
+use crate::{db::Connection, schema::watermarks};
+use diesel::prelude::*;
+use diesel_async::RunQueryDsl;
+
+#[derive(Insertable, Debug, Clone)]
+#[diesel(table_name = watermarks)]
+pub struct StoredWatermark {
+    pub pipeline: String,
+    pub epoch_hi_inclusive: i64,
+    pub checkpoint_hi_inclusive: i64,
+    pub tx_hi: i64,
+    pub epoch_lo: i64,
+    pub reader_lo: i64,
+    pub timestamp_ms: i64,
+    pub pruner_hi: i64,
+}
+
+/// Fields that the committer is responsible for setting.
+#[derive(AsChangeset, Selectable, Queryable, Debug, Clone)]
+#[diesel(table_name = watermarks)]
+pub struct CommitterWatermark<'p> {
+    pub pipeline: Cow<'p, str>,
+    pub epoch_hi_inclusive: i64,
+    pub checkpoint_hi_inclusive: i64,
+    pub tx_hi: i64,
+}
+
+/// Outcomes from extending one watermark with another.
+#[derive(Debug, Clone, Copy, Eq, PartialEq)]
+pub enum Ordering {
+    /// The watermark was in the future, so could not be added.
+    Future,
+
+    /// The added watermark was in the past, so the current watermark didn't change.
+    Past,
+
+    /// The added watermark was the successor to the current watermark, so was used in the update.
+    Next,
+}
+
+impl CommitterWatermark<'static> {
+    /// Get the current high watermark for the pipeline.
+    pub async fn get(
+        conn: &mut Connection<'_>,
+        pipeline: &'static str,
+    ) -> QueryResult<Option<Self>> {
+        watermarks::table
+            .select(CommitterWatermark::as_select())
+            .filter(watermarks::pipeline.eq(pipeline))
+            .first(conn)
+            .await
+            .optional()
+    }
+}
+
+impl<'p> CommitterWatermark<'p> {
+    /// A new watermark with the given pipeline name indicating zero progress.
+    pub fn initial(pipeline: Cow<'p, str>) -> Self {
+        CommitterWatermark {
+            pipeline,
+            epoch_hi_inclusive: 0,
+            checkpoint_hi_inclusive: 0,
+            tx_hi: 0,
+        }
+    }
+
+    /// Upsert the high watermark as long as it raises the watermark stored in the database.
+    /// Returns a boolean indicating whether the watermark was actually updated or not.
+    ///
+    /// TODO(amnn): Test this (depends on supporting migrations and tempdb).
+    pub async fn update(&self, conn: &mut Connection<'_>) -> QueryResult<bool> {
+        use diesel::query_dsl::methods::FilterDsl;
+        Ok(diesel::insert_into(watermarks::table)
+            .values(StoredWatermark::from(self.clone()))
+            .on_conflict(watermarks::pipeline)
+            .do_update()
+            .set(self)
+            .filter(watermarks::checkpoint_hi_inclusive.lt(self.checkpoint_hi_inclusive))
+            .execute(conn)
+            .await?
+            > 0)
+    }
+
+    /// Compare `other` with the immediate successor of this watermark.
+    pub fn next_cmp(&self, other: &CommitterWatermark<'_>) -> Ordering {
+        let next = self.checkpoint_hi_inclusive + 1;
+        match other.checkpoint_hi_inclusive.cmp(&next) {
+            cmp::Ordering::Equal => Ordering::Next,
+            cmp::Ordering::Less => Ordering::Past,
+            cmp::Ordering::Greater => Ordering::Future,
+        }
+    }
+}
+
+impl<'p> From<CommitterWatermark<'p>> for StoredWatermark {
+    fn from(watermark: CommitterWatermark<'p>) -> Self {
+        StoredWatermark {
+            pipeline: watermark.pipeline.into_owned(),
+            epoch_hi_inclusive: watermark.epoch_hi_inclusive,
+            checkpoint_hi_inclusive: watermark.checkpoint_hi_inclusive,
+            tx_hi: watermark.tx_hi,
+            epoch_lo: 0,
+            reader_lo: 0,
+            timestamp_ms: 0,
+            pruner_hi: 0,
+        }
+    }
+}
+
+// Ordering for watermarks is driven solely by their checkpoints.
+
+impl PartialEq for CommitterWatermark<'_> {
+    fn eq(&self, other: &Self) -> bool {
+        self.checkpoint_hi_inclusive == other.checkpoint_hi_inclusive
+    }
+}
+
+impl Eq for CommitterWatermark<'_> {}
+
+impl Ord for CommitterWatermark<'_> {
+    fn cmp(&self, other: &Self) -> cmp::Ordering {
+        self.checkpoint_hi_inclusive
+            .cmp(&other.checkpoint_hi_inclusive)
+    }
+}
+
+impl PartialOrd for CommitterWatermark<'_> {
+    fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {
+        Some(self.cmp(other))
+    }
+}
diff --git a/crates/sui-indexer-alt/src/schema.rs b/crates/sui-indexer-alt/src/schema.rs
index 68f84c764848e..5184de6505618 100644
--- a/crates/sui-indexer-alt/src/schema.rs
+++ b/crates/sui-indexer-alt/src/schema.rs
@@ -44,10 +44,24 @@ diesel::table! {
     }
 }
 
+diesel::table! {
+    watermarks (pipeline) {
+        pipeline -> Text,
+        epoch_hi_inclusive -> Int8,
+        checkpoint_hi_inclusive -> Int8,
+        tx_hi -> Int8,
+        epoch_lo -> Int8,
+        reader_lo -> Int8,
+        timestamp_ms -> Int8,
+        pruner_hi -> Int8,
+    }
+}
+
 diesel::allow_tables_to_appear_in_same_query!(
     kv_checkpoints,
     kv_objects,
     kv_transactions,
     tx_affected_objects,
     tx_balance_changes,
+    watermarks,
 );

From baa0f2e624fc5adf7efd4132f83b88148263f633 Mon Sep 17 00:00:00 2001
From: Ashok Menon <ashok@mystenlabs.com>
Date: Thu, 17 Oct 2024 15:24:11 +0100
Subject: [PATCH 27/33] chore(indexer-alt): Mention patch file in `diesel.toml`

## Description

This makes it so that calls to `diesel setup` or `diesel migration run`
will not forget to add the copyright notice to the schema file.

## Test plan

Run the following:

```
sui-indexer-alt$ diesel migration run --database-url="postgres://postgres:postgrespw@localhost:5432/sui_indexer_alt" --migration-dir migrations
```

Notice that the schema doesn't change (the copyright notice is not
remove)
---
 crates/sui-indexer-alt/diesel.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/crates/sui-indexer-alt/diesel.toml b/crates/sui-indexer-alt/diesel.toml
index 0679916a26a25..054029ff39a8a 100644
--- a/crates/sui-indexer-alt/diesel.toml
+++ b/crates/sui-indexer-alt/diesel.toml
@@ -1,5 +1,6 @@
 [print_schema]
 file = "src/schema.rs"
+patch_file = "schema.patch"
 
 [migrations_directory]
 dir = "migrations"

From d27a7270fa7e94344638e8dfc13255aba799c886 Mon Sep 17 00:00:00 2001
From: Ashok Menon <ashok@mystenlabs.com>
Date: Thu, 17 Oct 2024 15:25:47 +0100
Subject: [PATCH 28/33] easy(indexer-alt): Process checkpoint message mentions
 sequence number

## Description

Add the sequence number for the checkpoint that the pipeline processed
to the tracing message, so it's easier to follow along.

## Test plan

Run the indexer locally.
---
 crates/sui-indexer-alt/src/handlers/mod.rs | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/crates/sui-indexer-alt/src/handlers/mod.rs b/crates/sui-indexer-alt/src/handlers/mod.rs
index b963bba35665a..e055db7bc8238 100644
--- a/crates/sui-indexer-alt/src/handlers/mod.rs
+++ b/crates/sui-indexer-alt/src/handlers/mod.rs
@@ -202,8 +202,13 @@ fn handler<H: Handler + 'static>(
                     let values = H::handle(&checkpoint)?;
                     let elapsed = guard.stop_and_record();
 
+                    let epoch = checkpoint.checkpoint_summary.epoch;
+                    let cp_sequence_number = checkpoint.checkpoint_summary.sequence_number;
+                    let tx_hi = checkpoint.checkpoint_summary.network_total_transactions;
+
                     debug!(
                         pipeline = H::NAME,
+                        checkpoint = cp_sequence_number,
                         elapsed_ms = elapsed * 1000.0,
                         "Processed checkpoint",
                     );
@@ -218,10 +223,6 @@ fn handler<H: Handler + 'static>(
                         .with_label_values(&[H::NAME])
                         .inc_by(values.len() as u64);
 
-                    let epoch = checkpoint.checkpoint_summary.epoch;
-                    let cp_sequence_number = checkpoint.checkpoint_summary.sequence_number;
-                    let tx_hi = checkpoint.checkpoint_summary.network_total_transactions;
-
                     tx.send(Indexed {
                         epoch,
                         cp_sequence_number,

From 555bd241e5aa7c4373b402600f98be87b660b3b2 Mon Sep 17 00:00:00 2001
From: Ashok Menon <ashok@mystenlabs.com>
Date: Thu, 17 Oct 2024 16:27:26 +0100
Subject: [PATCH 29/33] indexer-alt: support checkpoint upperbound for graceful
 shutdown

## Description
Previously, the service would just hang after having wound down all
pipelines during a graceful shutdown. This is because the graceful
shutdown process was left waiting for an explicit interrupt signal to
unblock the whole shutdown process.

This change relaxes that constraint: If the service is already shutting
down, then there is no need to wait for the explicit Ctrl-C. This makes
it feasible to run the indexer for specific ranges of checkpoints.

Note that we don't want to call the global cancellation token once
ingestion is done, because that might stop a downstream task before it
has acted on the downloaded checkpoints. Instead, the ingestion service
communicates completion by closing its channel, which propagates through
the handlers and committers.

## Test plan

```
RUST_LOG="info,sui_indexer_alt=debug" cargo run -p sui-indexer-alt --release --  \
  --database-url "postgres://postgres:postgrespw@localhost:5432/sui_indexer_alt" \
  --remote-store-url https://checkpoints.mainnet.sui.io                          \
  --last-checkpoint 1000
```
---
 crates/sui-indexer-alt/src/handlers/mod.rs  |  6 ++++-
 crates/sui-indexer-alt/src/ingestion/mod.rs |  5 +++-
 crates/sui-indexer-alt/src/lib.rs           | 26 +++++++++++++++------
 crates/sui-indexer-alt/src/task.rs          | 16 +++++++++----
 4 files changed, 39 insertions(+), 14 deletions(-)

diff --git a/crates/sui-indexer-alt/src/handlers/mod.rs b/crates/sui-indexer-alt/src/handlers/mod.rs
index e055db7bc8238..cfadb6990dbd0 100644
--- a/crates/sui-indexer-alt/src/handlers/mod.rs
+++ b/crates/sui-indexer-alt/src/handlers/mod.rs
@@ -237,7 +237,11 @@ fn handler<H: Handler + 'static>(
             })
             .await
         {
-            Ok(()) | Err(Break::Cancel) => {
+            Ok(()) => {
+                info!(pipeline = H::NAME, "Checkpoints done, stopping handler");
+            }
+
+            Err(Break::Cancel) => {
                 info!(pipeline = H::NAME, "Shutdown received, stopping handler");
             }
 
diff --git a/crates/sui-indexer-alt/src/ingestion/mod.rs b/crates/sui-indexer-alt/src/ingestion/mod.rs
index 9c920a805415a..385b2026a5acc 100644
--- a/crates/sui-indexer-alt/src/ingestion/mod.rs
+++ b/crates/sui-indexer-alt/src/ingestion/mod.rs
@@ -162,7 +162,10 @@ impl IngestionService {
                 })
                 .await
             {
-                Ok(()) => {}
+                Ok(()) => {
+                    info!("Checkpoints done, stopping ingestion service");
+                    // drop(subscribers);
+                }
 
                 Err(Error::Cancelled) => {
                     info!("Shutdown received, stopping ingestion service");
diff --git a/crates/sui-indexer-alt/src/lib.rs b/crates/sui-indexer-alt/src/lib.rs
index 0842fd6426609..21efc59ee38dd 100644
--- a/crates/sui-indexer-alt/src/lib.rs
+++ b/crates/sui-indexer-alt/src/lib.rs
@@ -153,12 +153,11 @@ impl Indexer {
     /// Ingestion will stop after consuming the configured `last_checkpoint`, if one is provided,
     /// or will continue until it tracks the tip of the network.
     pub async fn run(mut self) -> Result<JoinHandle<()>> {
-        self.handles.push(
-            self.metrics_service
-                .run()
-                .await
-                .context("Failed to start metrics service")?,
-        );
+        let metrics_handle = self
+            .metrics_service
+            .run()
+            .await
+            .context("Failed to start metrics service")?;
 
         // If an override has been provided, start ingestion from there, otherwise start ingestion
         // from just after the lowest committer watermark across all enabled pipelines.
@@ -177,6 +176,19 @@ impl Indexer {
                 .context("Failed to start ingestion service")?,
         );
 
-        Ok(tokio::spawn(graceful_shutdown(self.handles, self.cancel)))
+        let cancel = self.cancel.clone();
+        Ok(tokio::spawn(async move {
+            // Wait for the ingestion service and all its related tasks to wind down gracefully:
+            // If ingestion has been configured to only handle a specific range of checkpoints, we
+            // want to make sure that tasks are allowed to run to completion before shutting them
+            // down.
+            graceful_shutdown(self.handles, self.cancel).await;
+
+            info!("Indexing pipeline gracefully shut down");
+
+            // Pick off any stragglers (in this case, just the metrics service).
+            cancel.cancel();
+            metrics_handle.await.unwrap();
+        }))
     }
 }
diff --git a/crates/sui-indexer-alt/src/task.rs b/crates/sui-indexer-alt/src/task.rs
index c4fd802d8afb1..d027541a78310 100644
--- a/crates/sui-indexer-alt/src/task.rs
+++ b/crates/sui-indexer-alt/src/task.rs
@@ -4,7 +4,7 @@
 use std::iter;
 
 use futures::future::{self, Either};
-use tokio::{signal, task::JoinHandle};
+use tokio::{signal, sync::oneshot, task::JoinHandle};
 use tokio_util::sync::CancellationToken;
 
 /// Manages cleanly exiting the process, either because one of its constituent services has stopped
@@ -13,10 +13,16 @@ pub async fn graceful_shutdown(
     services: impl IntoIterator<Item = JoinHandle<()>>,
     cancel: CancellationToken,
 ) {
+    // If the service is naturalling winding down, we don't need to wait for an interrupt signal.
+    // This channel is used to short-circuit the await in that case.
+    let (cancel_ctrl_c_tx, cancel_ctrl_c_rx) = oneshot::channel();
+
     let interrupt = async {
-        signal::ctrl_c()
-            .await
-            .expect("Failed to listen for Ctrl-C signal");
+        tokio::select! {
+            _ = cancel_ctrl_c_rx => {}
+            _ = cancel.cancelled() => {}
+            _ = signal::ctrl_c() => cancel.cancel(),
+        }
 
         Ok(())
     };
@@ -30,7 +36,7 @@ pub async fn graceful_shutdown(
 
     // Wait for the first service to finish, or for an interrupt signal.
     let (_, _, rest) = future::select_all(futures).await;
-    cancel.cancel();
+    let _ = cancel_ctrl_c_tx.send(());
 
     // Wait for the remaining services to finish.
     let _ = future::join_all(rest).await;

From 4441e40eb13130cd3239e406bf0fa658cec00907 Mon Sep 17 00:00:00 2001
From: Ashok Menon <ashok@mystenlabs.com>
Date: Thu, 17 Oct 2024 16:46:27 +0100
Subject: [PATCH 30/33] indexer-alt: support only running some pipelines

## Description

This makes it easier to perform selective backfills.

## Test plan

```
sui$ RUST_LOG="info" cargo run -p sui-indexer-alt --release --                   \
  --database-url "postgres://postgres:postgrespw@localhost:5432/sui_indexer_alt" \
  --remote-store-url https://checkpoints.mainnet.sui.io                          \
  --last-checkpoint 1000                                                         \
  --pipeline kv_transactions                                                     \
  --pipeline tx_affected_objects
```

Note that for now, the system does clip off the end of the checkpoint
stream, rather than waiting for all workers to finish, and that still
needs to be fixed.
---
 crates/sui-indexer-alt/src/handlers/mod.rs |  1 +
 crates/sui-indexer-alt/src/lib.rs          | 17 ++++++++++++++++-
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/crates/sui-indexer-alt/src/handlers/mod.rs b/crates/sui-indexer-alt/src/handlers/mod.rs
index cfadb6990dbd0..7631317b647f6 100644
--- a/crates/sui-indexer-alt/src/handlers/mod.rs
+++ b/crates/sui-indexer-alt/src/handlers/mod.rs
@@ -178,6 +178,7 @@ fn handler<H: Handler + 'static>(
     }
 
     spawn_monitored_task!(async move {
+        info!(pipeline = H::NAME, "Starting handler");
         match ReceiverStream::new(rx)
             .map(Ok)
             .try_for_each_concurrent(H::FANOUT, |checkpoint| {
diff --git a/crates/sui-indexer-alt/src/lib.rs b/crates/sui-indexer-alt/src/lib.rs
index 21efc59ee38dd..8f26442314400 100644
--- a/crates/sui-indexer-alt/src/lib.rs
+++ b/crates/sui-indexer-alt/src/lib.rs
@@ -1,7 +1,7 @@
 // Copyright (c) Mysten Labs, Inc.
 // SPDX-License-Identifier: Apache-2.0
 
-use std::{net::SocketAddr, sync::Arc};
+use std::{collections::BTreeSet, net::SocketAddr, sync::Arc};
 
 use anyhow::{Context, Result};
 use db::{Db, DbConfig};
@@ -45,6 +45,9 @@ pub struct Indexer {
     /// Optional override of the checkpoint upperbound.
     last_checkpoint: Option<u64>,
 
+    /// Optional override of enabled pipelines.
+    enabled_pipelines: BTreeSet<String>,
+
     /// Cancellation token shared among all continuous tasks in the service.
     cancel: CancellationToken,
 
@@ -79,6 +82,11 @@ pub struct IndexerConfig {
     #[arg(long)]
     last_checkpoint: Option<u64>,
 
+    /// Only run the following pipelines -- useful for backfills. If not provided, all pipelines
+    /// will be run.
+    #[arg(long, action = clap::ArgAction::Append)]
+    pipeline: Vec<String>,
+
     /// Address to serve Prometheus Metrics from.
     #[arg(long, default_value = "0.0.0.0:9184")]
     pub metrics_address: SocketAddr,
@@ -92,6 +100,7 @@ impl Indexer {
             committer_config,
             first_checkpoint,
             last_checkpoint,
+            pipeline,
             metrics_address,
         } = config;
 
@@ -112,6 +121,7 @@ impl Indexer {
             committer_config,
             first_checkpoint,
             last_checkpoint,
+            enabled_pipelines: pipeline.into_iter().collect(),
             cancel,
             first_checkpoint_from_watermark: u64::MAX,
             handles: vec![],
@@ -121,6 +131,11 @@ impl Indexer {
     /// Adds a new pipeline to this indexer and starts it up. Although their tasks have started,
     /// they will be idle until the ingestion service starts, and serves it checkpoint data.
     pub async fn pipeline<H: Handler + 'static>(&mut self) -> Result<()> {
+        if !self.enabled_pipelines.is_empty() && !self.enabled_pipelines.contains(H::NAME) {
+            info!("Skipping pipeline {}", H::NAME);
+            return Ok(());
+        }
+
         let mut conn = self.db.connect().await.context("Failed DB connection")?;
 
         let watermark = CommitterWatermark::get(&mut conn, H::NAME)

From a998c2a5987a58a978f082fd871b80d9e541e828 Mon Sep 17 00:00:00 2001
From: Ashok Menon <ashok@mystenlabs.com>
Date: Fri, 18 Oct 2024 00:41:40 +0100
Subject: [PATCH 31/33] fix(indexer-alt): Flush pending rows before shutdown

## Description

When running the indexer on a finite range of checkpoints. Make sure
commiters' buffers are completely empty before shutting down their task,
otherwise we may not fully write out all the intended rows for the range
of checkpoints provided (there may be some data left at the bottom of
the barrel).

## Test plan

Ran the following:

```
cargo run -p sui-indexer-alt --release -- \
  --database-url "postgres://postgres:postgrespw@localhost:5432/sui_indexer_alt" \
  --remote-store-url https://checkpoints.mainnet.sui.io \
  --last-checkpoint 2000
```

Corroborated that the data that results in the DB at the end:

- Stops at the expected checkpoint (not before or after)
- Matches counts of rows in the production mainnet DB for the equivalent
  tables at the same checkpoints.

This can/should be made into an automated test, but that requires tempdb
and migrations to be implemented (a comment has been added to this
effect).
---
 crates/sui-indexer-alt/src/handlers/mod.rs | 25 +++++++++++-----------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/crates/sui-indexer-alt/src/handlers/mod.rs b/crates/sui-indexer-alt/src/handlers/mod.rs
index 7631317b647f6..81ab70e5a7042 100644
--- a/crates/sui-indexer-alt/src/handlers/mod.rs
+++ b/crates/sui-indexer-alt/src/handlers/mod.rs
@@ -605,6 +605,12 @@ fn committer<H: Handler + 'static>(
                         }
                     }
 
+                    // TODO (amnn): Test this behaviour (requires tempdb and migrations).
+                    if pending_rows == 0 && rx.is_closed() {
+                        info!(pipeline = H::NAME, "Handler closed channel, pending rows empty, stopping committer");
+                        break;
+                    }
+
                     cool.reset();
                 }
 
@@ -615,19 +621,14 @@ fn committer<H: Handler + 'static>(
                     poll.reset_immediately();
                 }
 
-                indexed = rx.recv(), if pending_rows < H::MAX_PENDING_SIZE => {
-                    if let Some(indexed) = indexed {
-                        metrics
-                            .total_committer_rows_received
-                            .with_label_values(&[H::NAME])
-                            .inc_by(indexed.values.len() as u64);
+                Some(indexed )= rx.recv(), if pending_rows < H::MAX_PENDING_SIZE => {
+                    metrics
+                        .total_committer_rows_received
+                        .with_label_values(&[H::NAME])
+                        .inc_by(indexed.values.len() as u64);
 
-                        pending_rows += indexed.values.len();
-                        pending.insert(indexed.cp_sequence_number, indexed);
-                    } else {
-                        info!(pipeline = H::NAME, "Handler closed channel, stopping committer");
-                        break;
-                    }
+                    pending_rows += indexed.values.len();
+                    pending.insert(indexed.cp_sequence_number, indexed);
                 }
             }
         }

From 396fdc87471e1a4b7d2d451ab1e59e03407275a5 Mon Sep 17 00:00:00 2001
From: Ashok Menon <ashok@mystenlabs.com>
Date: Fri, 18 Oct 2024 00:46:35 +0100
Subject: [PATCH 32/33] indexer-alt: --skip-watermark

## Description

Add a flag to skip writing to the watermark table. This would be useful
when an indexer is running in parallel with another indexer and it's
just not even productive to try writing to the watermarks table, because
it will always be behind.

## Test plan

Tested locally.
---
 crates/sui-indexer-alt/src/handlers/mod.rs | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/crates/sui-indexer-alt/src/handlers/mod.rs b/crates/sui-indexer-alt/src/handlers/mod.rs
index 81ab70e5a7042..1ff73588b70cb 100644
--- a/crates/sui-indexer-alt/src/handlers/mod.rs
+++ b/crates/sui-indexer-alt/src/handlers/mod.rs
@@ -96,6 +96,10 @@ pub struct CommitterConfig {
         value_parser = |s: &str| s.parse().map(Duration::from_millis),
     )]
     commit_interval: Duration,
+
+    /// Avoid writing to the watermark table
+    #[arg(long)]
+    skip_watermark: bool,
 }
 
 /// A batch of processed values associated with a single checkpoint. This is an internal type used
@@ -540,7 +544,7 @@ fn committer<H: Handler + 'static>(
                         "Gathered watermarks",
                     );
 
-                    if watermark_needs_update {
+                    if !config.skip_watermark && watermark_needs_update {
                         let guard = metrics
                             .watermark_commit_latency
                             .with_label_values(&[H::NAME])

From 9693b7a19eab62ec4fe7876a9d12a577375e4733 Mon Sep 17 00:00:00 2001
From: Ashok Menon <ashok@mystenlabs.com>
Date: Fri, 18 Oct 2024 10:50:55 +0100
Subject: [PATCH 33/33] chore(indexer-alt): note potential experiments

---
 crates/sui-indexer-alt/src/handlers/mod.rs | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/crates/sui-indexer-alt/src/handlers/mod.rs b/crates/sui-indexer-alt/src/handlers/mod.rs
index 1ff73588b70cb..603086f7310f4 100644
--- a/crates/sui-indexer-alt/src/handlers/mod.rs
+++ b/crates/sui-indexer-alt/src/handlers/mod.rs
@@ -342,6 +342,9 @@ fn committer<H: Handler + 'static>(
         loop {
             tokio::select! {
                 // Break ties in favour of operations that reduce the size of the buffer.
+                //
+                // TODO (experiment): Do we need this? It adds some complexity/subtlety to this
+                // work loop, so if we don't notice a big difference, we should get rid of it.
                 biased;
 
                 _ = cancel.cancelled() => {
@@ -390,7 +393,7 @@ fn committer<H: Handler + 'static>(
                         "Gathered batch",
                     );
 
-                    // TODO (optimization): Switch to COPY FROM, which should offer faster inserts?
+                    // TODO (experiment): Switch to COPY FROM, which should offer faster inserts?
                     //
                     // Note that COPY FROM cannot handle conflicts -- we need a way to gracefully
                     // fail over to `INSERT INTO ... ON CONFLICT DO NOTHING` if we encounter a
@@ -428,7 +431,7 @@ fn committer<H: Handler + 'static>(
                         .with_label_values(&[H::NAME])
                         .start_timer();
 
-                    // TODO (optimization): Parallelize batch writes?
+                    // TODO (experiment): Parallelize batch writes?
                     //
                     // Previous findings suggest that having about 5 parallel committers per table
                     // yields the best performance. Is that still true for this new architecture?
@@ -621,6 +624,10 @@ fn committer<H: Handler + 'static>(
                 // If there are enough pending rows, and we've expended the cooldown, reset the
                 // commit polling interval so that on the next iteration of the loop, we will write
                 // out another batch.
+                //
+                // TODO (experiment): Do we need this cooldown to deal with contention on the
+                // connection pool? It's possible that this is just going to eat into our
+                // throughput.
                 _ = cool.tick(), if pending_rows > H::BATCH_SIZE => {
                     poll.reset_immediately();
                 }