semiotic-ai · suchapalaver · Nov 6, 2024 · Nov 6, 2024 · Nov 1, 2024
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -22,7 +22,7 @@ fake = "2.10.0"
 futures = "0.3.31"
 hex = "0.4.3"
 http = "1.1.0"
-insta = "1.39.0"
+insta = "1.41.1"
 log = "0.4.20"
 merkle_proof = { git = "https://github.com/semiotic-ai/lighthouse.git", branch = "stable" }
 object_store = { version = "0.11.1", features = ["gcp", "http", "aws"] }
@@ -35,7 +35,6 @@ prost-wkt = "0.6.0"
 prost-wkt-types = "0.6.0"
 prost-wkt-build = "0.6.0"
 rand = "0.8.5"
-rayon = "1.8.0"
 reqwest = { version = "0.12.8", features = ["json"] }
 reth-primitives = { git = "https://github.com/paradigmxyz/reth", version = "1.1.0", tag = "v1.1.0" }
 reth-trie-common = { git = "https://github.com/paradigmxyz/reth", version = "1.1.0", tag = "v1.1.0" }
@@ -50,7 +49,6 @@ tempfile = "3.0"
 thiserror = "1.0.63"
 tokio = "1.39.2"
 tokio-stream = "0.1.16"
-tokio-test = "0.4.3"
 tonic = "0.12.0"
 tonic-build = "0.12.0"
 tracing = "0.1.40"

diff --git a/crates/firehose-client/Cargo.toml b/crates/firehose-client/Cargo.toml
@@ -25,4 +25,3 @@ tracing.workspace = true
 
 [dev-dependencies]
 hex.workspace = true
-insta.workspace = true
diff --git a/crates/firehose-protos/protos/block.proto b/crates/firehose-protos/protos/block.proto
@@ -791,9 +791,9 @@ message GasChange {
     REASON_CONTRACT_CREATION2 = 7;
     // REASON_DELEGATE_CALL is the amount of gas that will be charged for a 'DELEGATECALL' opcode executed by the EVM
     REASON_DELEGATE_CALL = 8;
-    // REASON_EVENT_LOG is the amount of gas that will be charged for a 'LOG<N>' opcode executed by the EVM
+    // REASON_EVENT_LOG is the amount of gas that will be charged for a `LOG<N>` opcode executed by the EVM
     REASON_EVENT_LOG = 9;
-    // REASON_EXT_CODE_COPY is the amount of gas that will be charged for a 'LOG<N>' opcode executed by the EVM
+    // REASON_EXT_CODE_COPY is the amount of gas that will be charged for a `LOG<N>` opcode executed by the EVM
     REASON_EXT_CODE_COPY = 10;
     // REASON_FAILED_EXECUTION is the burning of the remaining gas when the execution failed without a revert
     REASON_FAILED_EXECUTION = 11;

diff --git a/crates/flat-files-decoder/Cargo.toml b/crates/flat-files-decoder/Cargo.toml
@@ -7,32 +7,25 @@ edition = "2021"
 name = "flat_files_decoder"
 path = "src/lib.rs"
 
-[[bin]]
-name = "flat-files-decoder"
-path = "src/main.rs"
-
 [dependencies]
 alloy-primitives.workspace = true
 alloy-consensus.workspace = true
 alloy-eip2930.workspace = true
 bincode.workspace = true
-clap.workspace = true
 firehose-protos = { path = "../firehose-protos" }
 prost.workspace = true
-rand.workspace = true
-rayon.workspace = true
 reth-primitives.workspace = true
 serde = { workspace = true, features = ["derive"] }
 serde_json.workspace = true
 thiserror.workspace = true
-tokio = { workspace = true, features = ["full"] }
 tracing.workspace = true
-tracing-subscriber = { workspace = true, features = ["json", "env-filter"] }
 zstd.workspace = true
 
 [dev-dependencies]
+clap.workspace = true
 criterion.workspace = true
-tokio-test.workspace = true
+rand.workspace = true
+tracing-subscriber = { workspace = true, features = ["json", "env-filter"] }
 
 [[bench]]
 name = "decoder"
@@ -41,7 +34,3 @@ harness = false
 [[bench]]
 name = "stream_blocks"
 harness = false
-
-[build-dependencies]
-prost-build.workspace = true
-prost-wkt-build.workspace = true
diff --git a/crates/flat-files-decoder/README.md b/crates/flat-files-decoder/README.md
@@ -0,0 +1,60 @@
+# Flat Files Decoder
+
+## Running CLI Example
+
+### Commands
+
+The tool provides the following commands for various operations:
+
+- `stream`: Stream data continuously.
+- `decode`: Decode files from input to output.
+- `help`: Print this message or the help of the given subcommand(s).
+
+### Options
+
+You can use the following options with the commands for additional functionalities:
+
+- `-h, --help`: Print help information about specific command and options.
+- `-V, --version`: Print the version information of the tool.
+
+### Usage Examples
+
+Here are some examples of how to use the commands:
+
+1. To stream data continuously from `stdin`:
+
+```terminal
+cargo run -p flat-files-decoder --example cli stream
+```
+
+```terminal
+cat example0017686312.dbin | cargo run -p flat-files-decoder --example cli stream
+```
+
+This will output decoded header records as bytes into `stdout`
+
+1. To check a folder of dbin files:
+
+```terminal
+cargo run -p flat-files-decoder --example cli decode --input ./input_files/ --compression true
+```
+
+So, if using test data from a `test-assets/` folder in the root of the `veemon` repo:
+
+```terminal
+cargo run -p flat-files-decoder --example cli decode --input test-assets/benchmark_files/pre_merge
+```
+
+This will store the block headers as json format in the output folder. 
+By passing `--headers-dir` a folder of assumed valid block headers can be provided to compare
+with the input flat files. Valid headers can be pulled from the [sync committee subprotocol](https://github.com/ethereum/annotated-spec/blob/master/altair/sync-protocol.md) for post-merge data.
+
+## Benchmarking
+
+- Run `cargo bench` in the root directory of the project
+- Benchmark results will be output to the terminal
+- Benchmark time includes reading from disk & writing output to disk
+- Results can be found in `target/criterion/report/index.html`
+
+For proper benchmarking of future improvements, fixes and features please compare baselines.
+Refer to [the end of this section of Criterion documentation](https://bheisler.github.io/criterion.rs/book/user_guide/command_line_options.html) for more information on creating and comparing baselines.
diff --git a/crates/flat-files-decoder/Readme.md b/crates/flat-files-decoder/Readme.md
diff --git a/crates/flat-files-decoder/benches/decoder.rs b/crates/flat-files-decoder/benches/decoder.rs
@@ -1,8 +1,11 @@
 extern crate rand;
 
 use criterion::{black_box, criterion_group, criterion_main, Criterion};
-use flat_files_decoder::{decoder::handle_file, decompression::Decompression};
-use std::fs;
+use flat_files_decoder::read_blocks_from_reader;
+use std::{
+    fs::{self, File},
+    io::BufReader,
+};
 
 const ITERS_PER_FILE: usize = 10;
 
@@ -23,7 +26,10 @@ fn bench(c: &mut Criterion) {
                 }
             }
 
-            b.iter(|| handle_file(black_box(&path), None, None, Decompression::None));
+            b.iter(|| {
+                let reader = BufReader::new(File::open(path.as_os_str()).unwrap());
+                read_blocks_from_reader(black_box(reader), false.into())
+            });
         }
     });
 

diff --git a/crates/flat-files-decoder/benches/stream_blocks.rs b/crates/flat-files-decoder/benches/stream_blocks.rs
@@ -4,7 +4,7 @@ use std::{
 };
 
 use criterion::{black_box, criterion_group, criterion_main, Criterion};
-use flat_files_decoder::{dbin::DbinFile, error::DecoderError};
+use flat_files_decoder::read_block_from_reader;
 use prost::Message;
 
 const ITERS_PER_FILE: usize = 10;
@@ -27,16 +27,16 @@ fn read_decode_check_bench(c: &mut Criterion) {
             }
             let file = File::open(&path).expect("Failed to open file");
             let mut reader = BufReader::new(file);
-            let mut message: Result<Vec<u8>, DecoderError> = Err(DecoderError::InvalidDbinBytes);
+
             loop {
+                let mut message: Result<Vec<u8>, _> = Ok(Vec::new());
+
                 b.iter(|| {
-                    message = black_box(DbinFile::read_message_stream(&mut reader));
+                    message = black_box(read_block_from_reader(&mut reader));
                 });
-                match message {
-                    Ok(_) => continue,
-                    Err(_) => {
-                        break;
-                    }
+
+                if message.is_err() {
+                    break;
                 }
             }
         }
@@ -57,7 +57,7 @@ fn read_decode_check_bench(c: &mut Criterion) {
             let file = File::open(&path).expect("Failed to open file");
             let mut reader = BufReader::new(file);
             loop {
-                let message = match DbinFile::read_message_stream(&mut reader) {
+                let message = match read_block_from_reader(&mut reader) {
                     Ok(message) => message,
                     Err(_) => {
                         break;
@@ -88,7 +88,7 @@ fn read_decode_check_bench(c: &mut Criterion) {
             let file = File::open(&path).expect("Failed to open file");
             let mut reader = BufReader::new(file);
             loop {
-                let message = match DbinFile::read_message_stream(&mut reader) {
+                let message = match read_block_from_reader(&mut reader) {
                     Ok(message) => message,
                     Err(_) => {
                         break;
@@ -121,7 +121,7 @@ fn read_decode_check_bench(c: &mut Criterion) {
             let file = File::open(&path).expect("Failed to open file");
             let mut reader = BufReader::new(file);
             loop {
-                let message = match DbinFile::read_message_stream(&mut reader) {
+                let message = match read_block_from_reader(&mut reader) {
                     Ok(message) => message,
                     Err(_) => {
                         break;
@@ -155,7 +155,7 @@ fn read_decode_check_bench(c: &mut Criterion) {
             let file = File::open(&path).expect("Failed to open file");
             let mut reader = BufReader::new(file);
             loop {
-                let message = match DbinFile::read_message_stream(&mut reader) {
+                let message = match read_block_from_reader(&mut reader) {
                     Ok(message) => message,
                     Err(_) => {
                         break;
Original file line number	Diff line number	Diff line change
Expand Up		@@ -25,4 +25,3 @@ tracing.workspace = true

		[dev-dependencies]
		hex.workspace = true
		insta.workspace = true