diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 00000000..6976e989 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,142 @@ +name: 'CI' +on: + pull_request: + push: + branches: + - master + +env: + RUST_BACKTRACE: 1 + CARGO_TERM_COLOR: always + +jobs: + ci-pass: + name: CI is green + runs-on: ubuntu-latest + needs: + - test-linux + - test-windows + - test-macos + - build-extra + - lint + steps: + - run: exit 0 + + test-linux: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - uses: dtolnay/rust-toolchain@stable + with: + components: rustfmt + - name: Run tests + run: | + cargo check --no-default-features --features tokio + cargo check --no-default-features --features tokio,sparse + cargo check --no-default-features --features tokio,sparse,cache + cargo check --no-default-features --features async-std + cargo check --no-default-features --features async-std,sparse + cargo check --no-default-features --features async-std,sparse,cache + cargo test --no-default-features --features js_interop_tests,tokio + cargo test --no-default-features --features js_interop_tests,tokio,sparse + cargo test --no-default-features --features js_interop_tests,tokio,sparse,cache + cargo test --no-default-features --features js_interop_tests,async-std + cargo test --no-default-features --features js_interop_tests,async-std,sparse + cargo test --no-default-features --features js_interop_tests,async-std,sparse,cache + cargo test --benches --no-default-features --features tokio + cargo test --benches --no-default-features --features async-std + + test-windows: + runs-on: windows-latest + + steps: + - uses: actions/checkout@v3 + - uses: dtolnay/rust-toolchain@stable + with: + components: rustfmt + - name: Run tests + run: | + cargo check --no-default-features --features tokio + cargo check --no-default-features --features tokio,sparse + cargo check --no-default-features --features tokio,sparse,cache + cargo check --no-default-features --features async-std + cargo check --no-default-features --features async-std,sparse + cargo check --no-default-features --features async-std,sparse,cache + cargo test --no-default-features --features tokio + cargo test --no-default-features --features tokio,sparse + cargo test --no-default-features --features tokio,sparse,cache + cargo test --no-default-features --features async-std + cargo test --no-default-features --features async-std,sparse + cargo test --no-default-features --features async-std,sparse,cache + cargo test --benches --no-default-features --features tokio + cargo test --benches --no-default-features --features async-std + + test-macos: + runs-on: macos-latest + + steps: + - uses: actions/checkout@v3 + - uses: dtolnay/rust-toolchain@stable + with: + components: rustfmt + - name: Run tests + run: | + cargo check --no-default-features --features tokio + cargo check --no-default-features --features tokio,sparse + cargo check --no-default-features --features tokio,sparse,cache + cargo check --no-default-features --features async-std + cargo check --no-default-features --features async-std,sparse + cargo check --no-default-features --features async-std,sparse,cache + cargo test --no-default-features --features js_interop_tests,tokio + cargo test --no-default-features --features js_interop_tests,tokio,sparse + cargo test --no-default-features --features js_interop_tests,tokio,sparse,cache + cargo test --no-default-features --features js_interop_tests,async-std + cargo test --no-default-features --features js_interop_tests,async-std,sparse + cargo test --no-default-features --features js_interop_tests,async-std,sparse,cache + cargo test --benches --no-default-features --features tokio + cargo test --benches --no-default-features --features async-std + + build-extra: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - uses: dtolnay/rust-toolchain@stable + with: + targets: wasm32-unknown-unknown + - name: Build WASM + run: | + cargo build --target=wasm32-unknown-unknown --no-default-features --features tokio + cargo build --target=wasm32-unknown-unknown --no-default-features --features async-std + - name: Build release + run: | + cargo build --release --no-default-features --features tokio + cargo build --release --no-default-features --features tokio,sparse + cargo build --release --no-default-features --features tokio,sparse,cache + cargo build --release --no-default-features --features async-std + cargo build --release --no-default-features --features async-std,sparse + cargo build --release --no-default-features --features async-std,sparse,cache + - name: Run examples + run: | + cargo run --no-default-features --features tokio --example disk + cargo run --no-default-features --features async-std --example disk + cargo run --no-default-features --features tokio --example memory + cargo run --no-default-features --features async-std --example memory + cargo run --no-default-features --features tokio --example replication + cargo run --no-default-features --features async-std --example replication + + lint: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - uses: dtolnay/rust-toolchain@stable + with: + components: rustfmt, clippy + - uses: actions-rs/clippy-check@v1 + with: + token: ${{ secrets.GITHUB_TOKEN }} + - name: Format check + run: | + cargo fmt -- --check diff --git a/.gitignore b/.gitignore index 2e7f0592..292f6504 100644 --- a/.gitignore +++ b/.gitignore @@ -7,7 +7,9 @@ npm-debug.log* .nyc_output target/ Cargo.lock +package-lock.json my-first-dataset/ feed.db/ .vscode +tests/js/work diff --git a/Cargo.toml b/Cargo.toml index e36d9d89..f9e09fd2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,12 +1,15 @@ [package] name = "hypercore" -version = "0.11.1-beta.10" +version = "0.12.0-alpha.9" license = "MIT OR Apache-2.0" description = "Secure, distributed, append-only log" documentation = "https://docs.rs/hypercore" repository = "https://github.com/datrs/hypercore" readme = "README.md" -authors = ["Yoshua Wuyts "] +authors = [ + "Yoshua Wuyts ", + "Timo Tiuraniemi " +] keywords = ["dat", "p2p", "stream", "feed", "merkle"] categories = [ "asynchronous", @@ -15,33 +18,61 @@ categories = [ "data-structures", "encoding", ] -edition = "2018" +edition = "2021" [dependencies] -blake2-rfc = "0.2.18" -byteorder = "1.3.4" -ed25519-dalek = "1.0.1" -anyhow = "1.0.26" -flat-tree = "5.0.0" -lazy_static = "1.4.0" -memory-pager = "0.9.0" -merkle-tree-stream = "0.12.0" -pretty-hash = "0.4.1" -rand = "0.7.3" -random-access-disk = "2.0.0" -random-access-memory = "2.0.0" -random-access-storage = "4.0.0" -sha2 = "0.9.2" -sleep-parser = "0.8.0" -sparse-bitfield = "0.11.0" -tree-index = "0.6.0" -bitfield-rle = "0.2.0" -futures = "0.3.4" -async-std = "1.5.0" +blake2 = "0.10" +byteorder = "1" +ed25519-dalek = { version = "2", features = ["rand_core"] } +getrandom = { version = "0.2", features = ["js"] } +thiserror = "1" +tracing = "0.1" +compact-encoding = "1" +flat-tree = "6" +merkle-tree-stream = "0.12" +pretty-hash = "0.4" +rand = "0.8" +random-access-memory = "3" +random-access-storage = "5" +sha2 = "0.10" +futures = "0.3" +crc32fast = "1" +intmap = "2" +moka = { version = "0.12", optional = true, features = ["sync"] } + +[target.'cfg(not(target_arch = "wasm32"))'.dependencies] +random-access-disk = { version = "3", default-features = false } [dev-dependencies] -quickcheck = "0.9.2" +anyhow = "1.0.70" +proptest = "1.1.0" +proptest-derive = "0.2.0" data-encoding = "2.2.0" remove_dir_all = "0.7.0" tempfile = "3.1.0" -async-std = { version = "1.5.0", features = ["attributes"] } +async-std = { version = "1.12.0", features = ["attributes"] } +tokio = { version = "1.27.0", default-features = false, features = ["macros", "rt", "rt-multi-thread"] } +tokio-test = "0.4" +sha2 = "0.10" +criterion = { version = "0.4", features = ["async_std", "async_tokio"] } +test-log = { version = "0.2.11", default-features = false, features = ["trace"] } +tracing-subscriber = { version = "0.3.16", features = ["env-filter", "fmt"] } + +[features] +default = ["async-std", "sparse"] +sparse = ["random-access-disk/sparse"] +tokio = ["random-access-disk/tokio"] +async-std = ["random-access-disk/async-std"] +cache = ["moka"] +# Used only in interoperability tests under tests/js-interop which use the javascript version of hypercore +# to verify that this crate works. To run them, use: +# cargo test --features js-interop-tests +js_interop_tests = [] + +[[bench]] +name = "memory" +harness = false + +[[bench]] +name = "disk" +harness = false diff --git a/README.md b/README.md index fc0de20d..a95afaeb 100644 --- a/README.md +++ b/README.md @@ -1,53 +1,99 @@ -# hypercore +# Hypercore [![crates.io version][1]][2] [![build status][3]][4] [![downloads][5]][6] [![docs.rs docs][7]][8] -WIP. Secure, distributed, append-only log structure. Adapted from -[mafintosh/hypercore](https://github.com/mafintosh/hypercore). +Hypercore is a secure, distributed append-only log. This crate is a limited Rust +port of the original Javascript +[holepunchto/hypercore](https://github.com/holepunchto/hypercore). The goal is to +maintain binary compatibility with the LTS version with regards to disk storage. + +See [hypercore-protocol-rs](https://github.com/datrs/hypercore-protocol-rs) for the +corresponding wire protocol implementation. - [Documentation][8] - [Crates.io][2] -**NOTE**: The master branch currently only works with the old hypercore version 9. -For ongoing work to support the latest version 10 of hypercore [see the v10 branch](https://github.com/datrs/hypercore/tree/v10). +## Features + +- [x] Create [in-memory](https://github.com/datrs/random-access-memory) and [disk](https://github.com/datrs/random-access-disk) hypercores +- [x] Append to hypercore either a single entry or a batch of entries +- [x] Get entries from hypercore +- [x] Clear range from hypercore, with optional support for sparse files +- [x] Support basic replication by creating proofs in a source hypercore and verifying and applying them to a destination hypercore +- [x] Support `tokio` or `async-std` runtimes +- [x] Support WASM for in-memory storage +- [x] Test Javascript interoperability for supported features +- [x] Add optional read cache +- [ ] Support the new [manifest](https://github.com/holepunchto/hypercore/blob/main/lib/manifest.js) in the wire protocol to remain compatible with upcoming v11 +- [ ] Finalize documentation and release v1.0.0 ## Usage + ```rust -let mut feed = hypercore::open("./feed.db").await?; +// Create an in-memory hypercore using a builder +let mut hypercore = HypercoreBuilder::new(Storage::new_memory().await.unwrap()) + .build() + .await + .unwrap(); -feed.append(b"hello").await?; -feed.append(b"world").await?; +// Append entries to the log +hypercore.append(b"Hello, ").await.unwrap(); +hypercore.append(b"world!").await.unwrap(); -assert_eq!(feed.get(0).await?, Some(b"hello".to_vec())); -assert_eq!(feed.get(1).await?, Some(b"world".to_vec())); +// Read entries from the log +assert_eq!(hypercore.get(0).await.unwrap().unwrap(), b"Hello, "); +assert_eq!(hypercore.get(1).await.unwrap().unwrap(), b"world!"); +``` + +Find more examples in the [examples](./examples) folder, and/or run: + +```bash +cargo run --example memory +cargo run --example disk +cargo run --example replication ``` ## Installation -```sh -$ cargo add hypercore + +```bash +cargo add hypercore ``` ## Safety -This crate uses ``#![deny(unsafe_code)]`` to ensure everything is implemented in + +This crate uses ``#![forbid(unsafe_code)]`` to ensure everythong is implemented in 100% Safe Rust. +## Development + +To test interoperability with Javascript, enable the `js_interop_tests` feature: + +```bash +cargo test --features js_interop_tests +``` + +Run benches with: + +```bash +cargo bench +``` + ## Contributing + Want to join us? Check out our ["Contributing" guide][contributing] and take a look at some of these issues: - [Issues labeled "good first issue"][good-first-issue] - [Issues labeled "help wanted"][help-wanted] -## References -- [github.com/mafintosh/hypercore](https://github.com/mafintosh/hypercore) - ## License + [MIT](./LICENSE-MIT) OR [Apache-2.0](./LICENSE-APACHE) [1]: https://img.shields.io/crates/v/hypercore.svg?style=flat-square [2]: https://crates.io/crates/hypercore -[3]: https://img.shields.io/travis/datrs/hypercore/master.svg?style=flat-square -[4]: https://travis-ci.org/datrs/hypercore +[3]: https://github.com/datrs/hypercore/actions/workflows/ci.yml/badge.svg +[4]: https://github.com/datrs/hypercore/actions [5]: https://img.shields.io/crates/d/hypercore.svg?style=flat-square [6]: https://crates.io/crates/hypercore [7]: https://img.shields.io/badge/docs-latest-blue.svg?style=flat-square diff --git a/benches/bench.rs b/benches/bench.rs deleted file mode 100644 index 5c113a83..00000000 --- a/benches/bench.rs +++ /dev/null @@ -1,58 +0,0 @@ -#![feature(test)] -extern crate test; - -use anyhow::Error; -use random_access_memory::RandomAccessMemory; -use test::Bencher; - -use hypercore::{Feed, Storage}; - -async fn create_feed(page_size: usize) -> Result, Error> { - let storage = Storage::new( - |_| Box::pin(async move { Ok(RandomAccessMemory::new(page_size)) }), - true, - ) - .await?; - Feed::with_storage(storage).await -} - -#[bench] -fn create(b: &mut Bencher) { - b.iter(|| { - async_std::task::block_on(async { - create_feed(1024).await.unwrap(); - }); - }); -} - -#[bench] -fn write(b: &mut Bencher) { - async_std::task::block_on(async { - let mut feed = create_feed(1024).await.unwrap(); - let data = Vec::from("hello"); - b.iter(|| { - async_std::task::block_on(async { - feed.append(&data).await.unwrap(); - }); - }); - }); -} - -#[bench] -fn read(b: &mut Bencher) { - async_std::task::block_on(async { - let mut feed = create_feed(1024).await.unwrap(); - let data = Vec::from("hello"); - for _ in 0..1000 { - feed.append(&data).await.unwrap(); - } - - let mut i = 0; - b.iter(|| { - async_std::task::block_on(async { - feed.get(i).await.unwrap(); - i += 1; - }); - }); - }); -} diff --git a/benches/disk.rs b/benches/disk.rs new file mode 100644 index 00000000..326f57b3 --- /dev/null +++ b/benches/disk.rs @@ -0,0 +1,140 @@ +use std::time::{Duration, Instant}; + +#[cfg(feature = "async-std")] +use criterion::async_executor::AsyncStdExecutor; +use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use hypercore::{Hypercore, HypercoreBuilder, HypercoreError, Storage}; +use random_access_disk::RandomAccessDisk; +use tempfile::Builder as TempfileBuilder; + +fn bench_create_disk(c: &mut Criterion) { + let mut group = c.benchmark_group("slow_call"); + group.measurement_time(Duration::from_secs(20)); + + #[cfg(feature = "async-std")] + group.bench_function("create_disk", move |b| { + b.to_async(AsyncStdExecutor) + .iter(|| create_hypercore("create")); + }); + #[cfg(feature = "tokio")] + group.bench_function("create_disk", move |b| { + let rt = tokio::runtime::Runtime::new().unwrap(); + b.to_async(&rt).iter(|| create_hypercore("create")); + }); +} + +#[cfg(feature = "cache")] +async fn create_hypercore(name: &str) -> Result, HypercoreError> { + let dir = TempfileBuilder::new() + .prefix(name) + .tempdir() + .unwrap() + .into_path(); + let storage = Storage::new_disk(&dir, true).await?; + HypercoreBuilder::new(storage) + .node_cache_options(hypercore::CacheOptionsBuilder::new()) + .build() + .await +} + +#[cfg(not(feature = "cache"))] +async fn create_hypercore(name: &str) -> Result, HypercoreError> { + let dir = TempfileBuilder::new() + .prefix(name) + .tempdir() + .unwrap() + .into_path(); + let storage = Storage::new_disk(&dir, true).await?; + HypercoreBuilder::new(storage).build().await +} + +fn bench_write_disk(c: &mut Criterion) { + let mut group = c.benchmark_group("slow_call"); + group.measurement_time(Duration::from_secs(20)); + + #[cfg(feature = "async-std")] + group.bench_function("write disk", |b| { + b.to_async(AsyncStdExecutor).iter_custom(write_disk); + }); + #[cfg(feature = "tokio")] + group.bench_function("write disk", |b| { + let rt = tokio::runtime::Runtime::new().unwrap(); + b.to_async(&rt).iter_custom(write_disk); + }); +} + +async fn write_disk(iters: u64) -> Duration { + let mut hypercore = create_hypercore("write").await.unwrap(); + let data = Vec::from("hello"); + let start = Instant::now(); + for _ in 0..iters { + black_box(hypercore.append(&data).await.unwrap()); + } + start.elapsed() +} + +fn bench_read_disk(c: &mut Criterion) { + let mut group = c.benchmark_group("slow_call"); + group.measurement_time(Duration::from_secs(20)); + + #[cfg(feature = "async-std")] + group.bench_function("read disk", |b| { + b.to_async(AsyncStdExecutor).iter_custom(read_disk); + }); + #[cfg(feature = "tokio")] + group.bench_function("read disk", |b| { + let rt = tokio::runtime::Runtime::new().unwrap(); + b.to_async(&rt).iter_custom(read_disk); + }); +} + +async fn read_disk(iters: u64) -> Duration { + let mut hypercore = create_hypercore("read").await.unwrap(); + let data = Vec::from("hello"); + for _ in 0..iters { + hypercore.append(&data).await.unwrap(); + } + let start = Instant::now(); + for i in 0..iters { + black_box(hypercore.get(i).await.unwrap()); + } + start.elapsed() +} + +fn bench_clear_disk(c: &mut Criterion) { + let mut group = c.benchmark_group("slow_call"); + group.measurement_time(Duration::from_secs(20)); + + #[cfg(feature = "async-std")] + group.bench_function("clear disk", |b| { + b.to_async(AsyncStdExecutor).iter_custom(clear_disk); + }); + #[cfg(feature = "tokio")] + group.bench_function("clear disk", |b| { + let rt = tokio::runtime::Runtime::new().unwrap(); + b.to_async(&rt).iter_custom(clear_disk); + }); +} + +#[allow(clippy::unit_arg)] +async fn clear_disk(iters: u64) -> Duration { + let mut hypercore = create_hypercore("clear").await.unwrap(); + let data = Vec::from("hello"); + for _ in 0..iters { + hypercore.append(&data).await.unwrap(); + } + let start = Instant::now(); + for i in 0..iters { + black_box(hypercore.clear(i, 1).await.unwrap()); + } + start.elapsed() +} + +criterion_group!( + benches, + bench_create_disk, + bench_write_disk, + bench_read_disk, + bench_clear_disk +); +criterion_main!(benches); diff --git a/benches/memory.rs b/benches/memory.rs new file mode 100644 index 00000000..b439b1e1 --- /dev/null +++ b/benches/memory.rs @@ -0,0 +1,128 @@ +use std::time::{Duration, Instant}; + +#[cfg(feature = "async-std")] +use criterion::async_executor::AsyncStdExecutor; +use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use hypercore::{Hypercore, HypercoreBuilder, HypercoreError, Storage}; +use random_access_memory::RandomAccessMemory; + +fn bench_create_memory(c: &mut Criterion) { + #[cfg(feature = "async-std")] + c.bench_function("create memory", |b| { + b.to_async(AsyncStdExecutor).iter(|| create_hypercore(1024)); + }); + #[cfg(feature = "tokio")] + c.bench_function("create memory", |b| { + let rt = tokio::runtime::Runtime::new().unwrap(); + b.to_async(&rt).iter(|| create_hypercore(1024)); + }); +} + +#[cfg(feature = "cache")] +async fn create_hypercore( + page_size: usize, +) -> Result, HypercoreError> { + let storage = Storage::open( + |_| Box::pin(async move { Ok(RandomAccessMemory::new(page_size)) }), + false, + ) + .await?; + HypercoreBuilder::new(storage) + .node_cache_options(hypercore::CacheOptionsBuilder::new()) + .build() + .await +} + +#[cfg(not(feature = "cache"))] +async fn create_hypercore( + page_size: usize, +) -> Result, HypercoreError> { + let storage = Storage::open( + |_| Box::pin(async move { Ok(RandomAccessMemory::new(page_size)) }), + false, + ) + .await?; + HypercoreBuilder::new(storage).build().await +} + +fn bench_write_memory(c: &mut Criterion) { + #[cfg(feature = "async-std")] + c.bench_function("write memory", |b| { + b.to_async(AsyncStdExecutor).iter_custom(write_memory); + }); + #[cfg(feature = "tokio")] + c.bench_function("write memory", |b| { + let rt = tokio::runtime::Runtime::new().unwrap(); + b.to_async(&rt).iter_custom(write_memory); + }); +} + +async fn write_memory(iters: u64) -> Duration { + let mut hypercore = create_hypercore(1024).await.unwrap(); + let data = Vec::from("hello"); + let start = Instant::now(); + for _ in 0..iters { + black_box(hypercore.append(&data).await.unwrap()); + } + start.elapsed() +} + +fn bench_read_memory(c: &mut Criterion) { + #[cfg(feature = "async-std")] + c.bench_function("read memory", |b| { + b.to_async(AsyncStdExecutor).iter_custom(read_memory); + }); + #[cfg(feature = "tokio")] + c.bench_function("read memory", |b| { + let rt = tokio::runtime::Runtime::new().unwrap(); + b.to_async(&rt).iter_custom(read_memory); + }); +} + +async fn read_memory(iters: u64) -> Duration { + let mut hypercore = create_hypercore(1024).await.unwrap(); + let data = Vec::from("hello"); + for _ in 0..iters { + hypercore.append(&data).await.unwrap(); + } + let start = Instant::now(); + for i in 0..iters { + black_box(hypercore.get(i).await.unwrap()); + } + start.elapsed() +} + +fn bench_clear_memory(c: &mut Criterion) { + #[cfg(feature = "async-std")] + c.bench_function("clear memory", |b| { + b.to_async(AsyncStdExecutor).iter_custom(clear_memory); + }); + #[cfg(feature = "tokio")] + c.bench_function("clear memory", |b| { + let rt = tokio::runtime::Runtime::new().unwrap(); + b.to_async(&rt).iter_custom(clear_memory); + }); +} + +#[allow(clippy::unit_arg)] +async fn clear_memory(iters: u64) -> Duration { + let mut hypercore = create_hypercore(1024).await.unwrap(); + let data = Vec::from("hello"); + for _ in 0..iters { + hypercore.append(&data).await.unwrap(); + } + let start = Instant::now(); + for i in 0..iters { + black_box(hypercore.clear(i, 1).await.unwrap()); + } + start.elapsed() +} + +criterion_group!( + benches, + bench_create_memory, + bench_write_memory, + bench_read_memory, + bench_clear_memory +); +criterion_main!(benches); diff --git a/examples/async.rs b/examples/async.rs deleted file mode 100644 index a052bfe2..00000000 --- a/examples/async.rs +++ /dev/null @@ -1,29 +0,0 @@ -use async_std::task; -use hypercore::Feed; -use random_access_storage::RandomAccess; -use std::fmt::Debug; - -async fn append(feed: &mut Feed, content: &[u8]) -where - T: RandomAccess> + Debug + Send, -{ - feed.append(content).await.unwrap(); -} - -async fn print(feed: &mut Feed) -where - T: RandomAccess> + Debug + Send, -{ - println!("{:?}", feed.get(0).await); - println!("{:?}", feed.get(1).await); -} - -fn main() { - task::block_on(task::spawn(async { - let mut feed = Feed::default(); - - append(&mut feed, b"hello").await; - append(&mut feed, b"world").await; - print(&mut feed).await; - })); -} diff --git a/examples/disk.rs b/examples/disk.rs new file mode 100644 index 00000000..99990897 --- /dev/null +++ b/examples/disk.rs @@ -0,0 +1,88 @@ +#[cfg(feature = "async-std")] +use async_std::main as async_main; +use hypercore::{HypercoreBuilder, HypercoreError, Storage}; +use tempfile::Builder; +#[cfg(feature = "tokio")] +use tokio::main as async_main; + +/// Example about using an in-memory hypercore. +#[async_main] +async fn main() { + // For the purposes of this example, first create a + // temporary directory to hold hypercore. + let dir = Builder::new() + .prefix("examples_disk") + .tempdir() + .unwrap() + .into_path(); + + // Create a disk storage, overwriting existing values. + let overwrite = true; + let storage = Storage::new_disk(&dir, overwrite) + .await + .expect("Could not create disk storage"); + + // Build a new disk hypercore + let mut hypercore = HypercoreBuilder::new(storage) + .build() + .await + .expect("Could not create disk hypercore"); + + // Append values to the hypercore + hypercore.append(b"Hello, ").await.unwrap(); + hypercore.append(b"from ").await.unwrap(); + + // Close hypercore + drop(hypercore); + + // Open hypercore again from same directory, not + // overwriting. + let overwrite = false; + let storage = Storage::new_disk(&dir, overwrite) + .await + .expect("Could not open existing disk storage"); + let mut hypercore = HypercoreBuilder::new(storage) + .open(true) + .build() + .await + .expect("Could not open disk hypercore"); + + // Append new values to the hypercore + hypercore.append(b"disk hypercore!").await.unwrap(); + + // Add three values and clear the first two + let batch: &[&[u8]] = &[ + b"first value to clear", + b"second value to clear", + b"third value to keep", + ]; + let new_length = hypercore.append_batch(batch).await.unwrap().length; + hypercore + .clear(new_length - 3, new_length - 1) + .await + .unwrap(); + + // The two values return None, but the last one returns correctly + assert!(hypercore.get(3).await.unwrap().is_none()); + assert!(hypercore.get(4).await.unwrap().is_none()); + assert_eq!( + hypercore.get(5).await.unwrap().unwrap(), + b"third value to keep" + ); + + // Print the first three values, converting binary back to string + println!( + "{}{}{}", + format_res(hypercore.get(0).await), + format_res(hypercore.get(1).await), + format_res(hypercore.get(2).await) + ); // prints "Hello, from disk hypercore!" +} + +fn format_res(res: Result>, HypercoreError>) -> String { + match res { + Ok(Some(bytes)) => String::from_utf8(bytes).expect("Shouldn't fail in example"), + Ok(None) => "Got None in feed".to_string(), + Err(e) => format!("Error getting value from feed, reason = {e:?}"), + } +} diff --git a/examples/iter.rs b/examples/iter.rs deleted file mode 100644 index f31e0f6c..00000000 --- a/examples/iter.rs +++ /dev/null @@ -1,80 +0,0 @@ -use std::iter; - -#[derive(Debug)] -struct Book { - pub title: String, -} - -#[derive(Debug)] -struct BookShelf { - pub books: Vec, -} - -#[derive(Debug)] -struct BookShelfIterator<'b> { - /// Keeps track which index we're currently at. - pub cursor: u64, - /// Borrow of the Bookshelf we're going to iterate over. - pub inner: &'b BookShelf, -} - -impl BookShelf { - /// Return an iterator over all values. - pub fn iter(&self) -> BookShelfIterator<'_> { - BookShelfIterator { - inner: self, - cursor: 0, - } - } -} - -impl<'b> iter::Iterator for BookShelfIterator<'b> { - type Item = &'b Book; - - fn next(&mut self) -> Option { - let cursor = self.cursor; - self.cursor += 1; - - if cursor >= self.inner.books.len() as u64 { - None - } else { - Some(&self.inner.books[cursor as usize]) - } - } -} - -impl<'b> iter::IntoIterator for &'b BookShelf { - type Item = &'b Book; - type IntoIter = BookShelfIterator<'b>; - - fn into_iter(self) -> Self::IntoIter { - Self::IntoIter { - cursor: 0, - inner: self, - } - } -} - -fn main() { - let library = BookShelf { - books: vec![ - Book { - title: "Das Kapital I".into(), - }, - Book { - title: "Das Kapital II".into(), - }, - Book { - title: "Das Kapital III".into(), - }, - ], - }; - - for book in library.iter() { - println!("book {}", book.title); - } - - for book in &library { - println!("book {}", book.title); - } -} diff --git a/examples/main.rs b/examples/main.rs deleted file mode 100644 index d711ecc6..00000000 --- a/examples/main.rs +++ /dev/null @@ -1,29 +0,0 @@ -use hypercore::Feed; - -#[async_std::main] -async fn main() { - let mut feed = Feed::open("feed.db").await.expect("Failed to create dir"); - - feed.append(b"hello").await.unwrap(); - feed.append(b"world").await.unwrap(); - - drop(feed); - - let mut feed = Feed::open("feed.db").await.expect("Failed to create dir"); - - feed.append(b"welcome").await.unwrap(); - feed.append(b"back").await.unwrap(); - - println!("{:?}", format_res(feed.get(0).await)); // prints "hello" - println!("{:?}", format_res(feed.get(1).await)); // prints "world" - println!("{:?}", format_res(feed.get(2).await)); // prints "welcome" - println!("{:?}", format_res(feed.get(3).await)); // prints "back" -} - -fn format_res(res: anyhow::Result>>) -> String { - match res { - Ok(Some(bytes)) => String::from_utf8(bytes).expect("Shouldnt fail in example"), - Ok(None) => "Got None in feed".to_string(), - Err(e) => format!("Error getting value from feed, reason = {}", e), - } -} diff --git a/examples/memory.rs b/examples/memory.rs new file mode 100644 index 00000000..a510ed6d --- /dev/null +++ b/examples/memory.rs @@ -0,0 +1,59 @@ +#[cfg(feature = "async-std")] +use async_std::main as async_main; +use hypercore::{HypercoreBuilder, HypercoreError, Storage}; +#[cfg(feature = "tokio")] +use tokio::main as async_main; + +/// Example about using an in-memory hypercore. +#[async_main] +async fn main() { + // Create a memory storage + let storage = Storage::new_memory() + .await + .expect("Could not create memory storage"); + + // Build hypercore + let mut hypercore = HypercoreBuilder::new(storage) + .build() + .await + .expect("Could not create memory hypercore"); + + // Append values + hypercore.append(b"Hello, ").await.unwrap(); + hypercore.append(b"from memory hypercore!").await.unwrap(); + + // Add three values and clear the first two + let batch: &[&[u8]] = &[ + b"first value to clear", + b"second value to clear", + b"third value to keep", + ]; + let new_length = hypercore.append_batch(batch).await.unwrap().length; + hypercore + .clear(new_length - 3, new_length - 1) + .await + .unwrap(); + + // The two values return None, but the last one returns correctly + assert!(hypercore.get(2).await.unwrap().is_none()); + assert!(hypercore.get(3).await.unwrap().is_none()); + assert_eq!( + hypercore.get(4).await.unwrap().unwrap(), + b"third value to keep" + ); + + // Print values, converting binary back to string + println!( + "{}{}", + format_res(hypercore.get(0).await), + format_res(hypercore.get(1).await) + ); // prints "Hello, from memory hypercore!" +} + +fn format_res(res: Result>, HypercoreError>) -> String { + match res { + Ok(Some(bytes)) => String::from_utf8(bytes).expect("Shouldn't fail in example"), + Ok(None) => "Got None in feed".to_string(), + Err(e) => format!("Error getting value from feed, reason = {e:?}"), + } +} diff --git a/examples/replication.rs b/examples/replication.rs new file mode 100644 index 00000000..52c205ac --- /dev/null +++ b/examples/replication.rs @@ -0,0 +1,116 @@ +#[cfg(feature = "async-std")] +use async_std::main as async_main; +use hypercore::{ + Hypercore, HypercoreBuilder, HypercoreError, PartialKeypair, RequestBlock, RequestUpgrade, + Storage, +}; +use random_access_disk::RandomAccessDisk; +use random_access_memory::RandomAccessMemory; +use tempfile::Builder; +#[cfg(feature = "tokio")] +use tokio::main as async_main; + +/// Example on how to replicate a (disk) hypercore to another (memory) hypercore. +/// NB: The replication functions used here are low-level, built for use in the wire +/// protocol. +#[async_main] +async fn main() { + // For the purposes of this example, first create a + // temporary directory to hold hypercore. + let dir = Builder::new() + .prefix("examples_replication") + .tempdir() + .unwrap() + .into_path(); + + // Create a disk storage, overwriting existing values. + let overwrite = true; + let storage = Storage::new_disk(&dir, overwrite) + .await + .expect("Could not create disk storage"); + + // Build a new disk hypercore + let mut origin_hypercore = HypercoreBuilder::new(storage) + .build() + .await + .expect("Could not create disk hypercore"); + + // Append values to the hypercore + let batch: &[&[u8]] = &[b"Hello, ", b"from ", b"replicated ", b"hypercore!"]; + origin_hypercore.append_batch(batch).await.unwrap(); + + // Store the public key + let origin_public_key = origin_hypercore.key_pair().public; + + // Create a peer of the origin hypercore using the public key + let mut replicated_hypercore = HypercoreBuilder::new( + Storage::new_memory() + .await + .expect("Could not create memory storage"), + ) + .key_pair(PartialKeypair { + public: origin_public_key, + secret: None, + }) + .build() + .await + .expect("Could not create memory hypercore"); + + // Replicate the four values in random order + replicate_index(&mut origin_hypercore, &mut replicated_hypercore, 3).await; + replicate_index(&mut origin_hypercore, &mut replicated_hypercore, 0).await; + replicate_index(&mut origin_hypercore, &mut replicated_hypercore, 2).await; + replicate_index(&mut origin_hypercore, &mut replicated_hypercore, 1).await; + + // Print values from replicated hypercore, converting binary back to string + println!( + "{}{}{}{}", + format_res(replicated_hypercore.get(0).await), + format_res(replicated_hypercore.get(1).await), + format_res(replicated_hypercore.get(2).await), + format_res(replicated_hypercore.get(3).await) + ); // prints "Hello, from replicated hypercore!" +} + +async fn replicate_index( + origin_hypercore: &mut Hypercore, + replicated_hypercore: &mut Hypercore, + request_index: u64, +) { + let missing_nodes = origin_hypercore + .missing_nodes(request_index) + .await + .expect("Could not get missing nodes"); + let upgrade_start = replicated_hypercore.info().contiguous_length; + let upgrade_length = origin_hypercore.info().contiguous_length - upgrade_start; + + let proof = origin_hypercore + .create_proof( + Some(RequestBlock { + index: request_index, + nodes: missing_nodes, + }), + None, + None, + Some(RequestUpgrade { + start: upgrade_start, + length: upgrade_length, + }), + ) + .await + .expect("Creating proof error") + .expect("Could not get proof"); + // Then the proof is verified and applied to the replicated party. + assert!(replicated_hypercore + .verify_and_apply_proof(&proof) + .await + .expect("Verifying and applying proof failed")); +} + +fn format_res(res: Result>, HypercoreError>) -> String { + match res { + Ok(Some(bytes)) => String::from_utf8(bytes).expect("Shouldn't fail in example"), + Ok(None) => "Got None in feed".to_string(), + Err(e) => format!("Error getting value from feed, reason = {e:?}"), + } +} diff --git a/src/audit.rs b/src/audit.rs deleted file mode 100644 index ef6f9a43..00000000 --- a/src/audit.rs +++ /dev/null @@ -1,20 +0,0 @@ -/// The audit report for a feed, created by the `.audit()` method. -#[derive(Debug, PartialEq, Clone)] -pub struct Audit { - /// The number of valid blocks identified - pub valid_blocks: u64, - /// The number of invalid blocks identified - pub invalid_blocks: u64, -} - -impl Audit { - /// Access the `valid_blocks` field from the proof. - pub fn valid_blocks(&self) -> u64 { - self.valid_blocks - } - - /// Access the `invalid_blocks` field from the proof. - pub fn invalid_blocks(&self) -> u64 { - self.invalid_blocks - } -} diff --git a/src/bitfield/dynamic.rs b/src/bitfield/dynamic.rs new file mode 100644 index 00000000..6c827c47 --- /dev/null +++ b/src/bitfield/dynamic.rs @@ -0,0 +1,403 @@ +use super::fixed::{FixedBitfield, FIXED_BITFIELD_BITS_LENGTH, FIXED_BITFIELD_LENGTH}; +use crate::{ + common::{BitfieldUpdate, StoreInfo, StoreInfoInstruction, StoreInfoType}, + Store, +}; +use futures::future::Either; +use std::{cell::RefCell, convert::TryInto}; + +const DYNAMIC_BITFIELD_PAGE_SIZE: usize = 32768; + +/// Dynamic sized bitfield, uses a map of `FixedBitfield` elements. +/// See: +/// https://github.com/hypercore-protocol/hypercore/blob/master/lib/bitfield.js +/// for reference. +#[derive(Debug)] +pub(crate) struct DynamicBitfield { + pages: intmap::IntMap>, + biggest_page_index: u64, + unflushed: Vec, +} + +impl DynamicBitfield { + pub(crate) fn open(info: Option) -> Either { + match info { + None => Either::Left(StoreInfoInstruction::new_size(Store::Bitfield, 0)), + Some(info) => { + if info.info_type == StoreInfoType::Size { + let bitfield_store_length = info.length.unwrap(); + // Read only multiples of 4 bytes. + let length = bitfield_store_length - (bitfield_store_length & 3); + return Either::Left(StoreInfoInstruction::new_content( + Store::Bitfield, + 0, + length, + )); + } + let data = info.data.expect("Did not receive bitfield store content"); + let resumed = data.len() >= 4; + let mut biggest_page_index = 0; + if resumed { + let mut pages: intmap::IntMap> = intmap::IntMap::new(); + let mut data_index = 0; + while data_index < data.len() { + let parent_index: u64 = (data_index / FIXED_BITFIELD_LENGTH) as u64; + pages.insert( + parent_index, + RefCell::new(FixedBitfield::from_data(data_index, &data)), + ); + if parent_index > biggest_page_index { + biggest_page_index = parent_index; + } + data_index += FIXED_BITFIELD_LENGTH; + } + Either::Right(Self { + pages, + unflushed: vec![], + biggest_page_index, + }) + } else { + Either::Right(Self { + pages: intmap::IntMap::new(), + unflushed: vec![], + biggest_page_index, + }) + } + } + } + } + + /// Flushes pending changes, returns info slices to write to storage. + pub(crate) fn flush(&mut self) -> Box<[StoreInfo]> { + let mut infos_to_flush: Vec = Vec::with_capacity(self.unflushed.len()); + for unflushed_id in &self.unflushed { + let mut p = self.pages.get_mut(*unflushed_id).unwrap().borrow_mut(); + let data = p.to_bytes(); + infos_to_flush.push(StoreInfo::new_content( + Store::Bitfield, + *unflushed_id * data.len() as u64, + &data, + )); + p.dirty = false; + } + self.unflushed = vec![]; + infos_to_flush.into_boxed_slice() + } + + pub(crate) fn get(&self, index: u64) -> bool { + let j = index & (DYNAMIC_BITFIELD_PAGE_SIZE as u64 - 1); + let i = (index - j) / DYNAMIC_BITFIELD_PAGE_SIZE as u64; + + if !self.pages.contains_key(i) { + false + } else { + let p = self.pages.get(i).unwrap().borrow(); + p.get(j.try_into().expect("Index should have fit into u32")) + } + } + + #[allow(dead_code)] + pub(crate) fn set(&mut self, index: u64, value: bool) -> bool { + let j = index & (DYNAMIC_BITFIELD_PAGE_SIZE as u64 - 1); + let i = (index - j) / DYNAMIC_BITFIELD_PAGE_SIZE as u64; + + if !self.pages.contains_key(i) { + if value { + self.pages.insert(i, RefCell::new(FixedBitfield::new())); + if i > self.biggest_page_index { + self.biggest_page_index = i; + } + } else { + // The page does not exist, but when setting false, that doesn't matter + return false; + } + } + + let mut p = self.pages.get_mut(i).unwrap().borrow_mut(); + let changed: bool = p.set(j.try_into().expect("Index should have fit into u32"), value); + + if changed && !p.dirty { + p.dirty = true; + self.unflushed.push(i); + } + changed + } + + pub(crate) fn update(&mut self, bitfield_update: &BitfieldUpdate) { + self.set_range( + bitfield_update.start, + bitfield_update.length, + !bitfield_update.drop, + ) + } + + pub(crate) fn set_range(&mut self, start: u64, length: u64, value: bool) { + let mut j = start & (DYNAMIC_BITFIELD_PAGE_SIZE as u64 - 1); + let mut i = (start - j) / (DYNAMIC_BITFIELD_PAGE_SIZE as u64); + let mut length = length; + + while length > 0 { + if !self.pages.contains_key(i) { + self.pages.insert(i, RefCell::new(FixedBitfield::new())); + if i > self.biggest_page_index { + self.biggest_page_index = i; + } + } + let mut p = self.pages.get_mut(i).unwrap().borrow_mut(); + + let end = std::cmp::min(j + length, DYNAMIC_BITFIELD_PAGE_SIZE as u64); + + let range_start: u32 = j + .try_into() + .expect("Range start should have fit into a u32"); + let range_end: u32 = (end - j) + .try_into() + .expect("Range end should have fit into a u32"); + + let changed = p.set_range(range_start, range_end, value); + if changed && !p.dirty { + p.dirty = true; + self.unflushed.push(i); + } + + j = 0; + i += 1; + length -= range_end as u64; + } + } + + /// Finds the first index of the value after given position. Returns None if not found. + pub(crate) fn index_of(&self, value: bool, position: u64) -> Option { + let first_index = position & (DYNAMIC_BITFIELD_PAGE_SIZE as u64 - 1); + let first_page = (position - first_index) / (DYNAMIC_BITFIELD_PAGE_SIZE as u64); + + if value { + // For finding the first positive value, we only care about pages that are set, + // not pages that don't exist, as they can't possibly contain the value. + + // To keep the common case fast, first try the same page as the position + if let Some(p) = self.pages.get(first_page) { + if let Some(index) = p.borrow().index_of(value, first_index as u32) { + return Some(first_page * DYNAMIC_BITFIELD_PAGE_SIZE as u64 + index as u64); + }; + } + + // It wasn't found on the first page, now get the keys that are bigger + // than the given index and sort them. + let mut keys: Vec<&u64> = self.pages.keys().filter(|key| **key > first_page).collect(); + keys.sort(); + for key in keys { + if let Some(p) = self.pages.get(*key) { + if let Some(index) = p.borrow().index_of(value, 0) { + return Some(key * DYNAMIC_BITFIELD_PAGE_SIZE as u64 + index as u64); + }; + } + } + } else { + // Searching for the false value is easier as it is automatically hit on + // a missing page. + let mut i = first_page; + let mut j = first_index as u32; + while i == first_page || i <= self.biggest_page_index { + if let Some(p) = self.pages.get(i) { + if let Some(index) = p.borrow().index_of(value, j) { + return Some(i * DYNAMIC_BITFIELD_PAGE_SIZE as u64 + index as u64); + }; + } else { + return Some(i * DYNAMIC_BITFIELD_PAGE_SIZE as u64 + j as u64); + } + i += 1; + j = 0; // We start at the beginning of each page + } + } + None + } + + /// Finds the last index of the value before given position. Returns None if not found. + pub(crate) fn last_index_of(&self, value: bool, position: u64) -> Option { + let last_index = position & (DYNAMIC_BITFIELD_PAGE_SIZE as u64 - 1); + let last_page = (position - last_index) / (DYNAMIC_BITFIELD_PAGE_SIZE as u64); + + if value { + // For finding the last positive value, we only care about pages that are set, + // not pages that don't exist, as they can't possibly contain the value. + + // To keep the common case fast, first try the same page as the position + if let Some(p) = self.pages.get(last_page) { + if let Some(index) = p.borrow().last_index_of(value, last_index as u32) { + return Some(last_page * DYNAMIC_BITFIELD_PAGE_SIZE as u64 + index as u64); + }; + } + + // It wasn't found on the last page, now get the keys that are smaller + // than the given index and sort them. + let mut keys: Vec<&u64> = self.pages.keys().filter(|key| **key < last_page).collect(); + keys.sort(); + keys.reverse(); + + for key in keys { + if let Some(p) = self.pages.get(*key) { + if let Some(index) = p + .borrow() + .last_index_of(value, FIXED_BITFIELD_BITS_LENGTH as u32 - 1) + { + return Some(key * DYNAMIC_BITFIELD_PAGE_SIZE as u64 + index as u64); + }; + } + } + } else { + // Searching for the false value is easier as it is automatically hit on + // a missing page. + let mut i = last_page; + let mut j = last_index as u32; + while i == last_page || i == 0 { + if let Some(p) = self.pages.get(i) { + if let Some(index) = p.borrow().last_index_of(value, j) { + return Some(i * DYNAMIC_BITFIELD_PAGE_SIZE as u64 + index as u64); + }; + } else { + return Some(i * DYNAMIC_BITFIELD_PAGE_SIZE as u64 + j as u64); + } + i -= 1; + j = FIXED_BITFIELD_BITS_LENGTH as u32 - 1; // We start at end of each page + } + } + + None + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn assert_value_range(bitfield: &DynamicBitfield, start: u64, length: u64, value: bool) { + for i in start..start + length { + assert_eq!(bitfield.get(i), value); + } + } + + fn get_dynamic_bitfield() -> DynamicBitfield { + match DynamicBitfield::open(Some(StoreInfo::new_content(Store::Bitfield, 0, &[]))) { + Either::Left(_) => panic!("Could not open bitfield"), + Either::Right(bitfield) => bitfield, + } + } + + #[test] + fn bitfield_dynamic_get_and_set() { + let mut bitfield = get_dynamic_bitfield(); + assert_value_range(&bitfield, 0, 9, false); + assert_eq!(bitfield.index_of(true, 0), None); + assert_eq!(bitfield.index_of(false, 0), Some(0)); + assert_eq!(bitfield.last_index_of(true, 9), None); + assert_eq!(bitfield.last_index_of(false, 9), Some(9)); + assert_eq!(bitfield.index_of(true, 10000000), None); + assert_eq!(bitfield.index_of(false, 10000000), Some(10000000)); + assert_eq!(bitfield.last_index_of(true, 10000000), None); + assert_eq!(bitfield.last_index_of(false, 10000000), Some(10000000)); + + bitfield.set(0, true); + assert!(bitfield.get(0)); + assert_eq!(bitfield.index_of(true, 0), Some(0)); + assert_eq!(bitfield.index_of(false, 0), Some(1)); + assert_eq!(bitfield.last_index_of(true, 9), Some(0)); + assert_eq!(bitfield.last_index_of(false, 9), Some(9)); + assert_eq!(bitfield.last_index_of(true, 10000000), Some(0)); + assert_eq!(bitfield.last_index_of(false, 10000000), Some(10000000)); + + assert_value_range(&bitfield, 1, 63, false); + bitfield.set(31, true); + assert!(bitfield.get(31)); + + assert_value_range(&bitfield, 32, 32, false); + assert!(!bitfield.get(32)); + bitfield.set(32, true); + assert!(bitfield.get(32)); + assert_value_range(&bitfield, 33, 31, false); + + assert_value_range(&bitfield, 32760, 8, false); + assert!(!bitfield.get(32767)); + bitfield.set(32767, true); + assert!(bitfield.get(32767)); + assert_value_range(&bitfield, 32760, 7, false); + + // Now for over one fixed bitfield values + bitfield.set(32768, true); + assert_value_range(&bitfield, 32767, 2, true); + assert_value_range(&bitfield, 32769, 9, false); + + bitfield.set(10000000, true); + assert!(bitfield.get(10000000)); + assert_value_range(&bitfield, 9999990, 10, false); + assert_value_range(&bitfield, 10000001, 9, false); + assert_eq!(bitfield.index_of(false, 32767), Some(32769)); + assert_eq!(bitfield.index_of(true, 32769), Some(10000000)); + assert_eq!(bitfield.last_index_of(true, 9999999), Some(32768)); + } + + #[test] + fn bitfield_dynamic_set_range() { + let mut bitfield = get_dynamic_bitfield(); + bitfield.set_range(0, 2, true); + assert_value_range(&bitfield, 0, 2, true); + assert_value_range(&bitfield, 3, 61, false); + + bitfield.set_range(2, 3, true); + assert_value_range(&bitfield, 0, 5, true); + assert_value_range(&bitfield, 5, 59, false); + + bitfield.set_range(1, 3, false); + assert!(bitfield.get(0)); + assert_value_range(&bitfield, 1, 3, false); + assert_value_range(&bitfield, 4, 1, true); + assert_value_range(&bitfield, 5, 59, false); + + bitfield.set_range(30, 30070, true); + assert_value_range(&bitfield, 5, 25, false); + assert_value_range(&bitfield, 30, 100, true); + assert_value_range(&bitfield, 30050, 50, true); + assert_value_range(&bitfield, 31000, 50, false); + + bitfield.set_range(32750, 18, true); + assert_value_range(&bitfield, 32750, 18, true); + + bitfield.set_range(32765, 3, false); + assert_value_range(&bitfield, 32750, 15, true); + assert_value_range(&bitfield, 32765, 3, false); + + // Now for over one fixed bitfield values + bitfield.set_range(32765, 15, true); + assert_value_range(&bitfield, 32765, 15, true); + assert_value_range(&bitfield, 32780, 9, false); + bitfield.set_range(32766, 3, false); + assert_value_range(&bitfield, 32766, 3, false); + + bitfield.set_range(10000000, 50, true); + assert_value_range(&bitfield, 9999990, 9, false); + assert_value_range(&bitfield, 10000050, 9, false); + assert_eq!(bitfield.index_of(true, 32780), Some(10000000)); + bitfield.set_range(0, 32780, false); + // Manufacture empty pages to test sorting + bitfield.set(900000, true); + bitfield.set(900000, false); + bitfield.set(300000, true); + bitfield.set(300000, false); + bitfield.set(200000, true); + bitfield.set(200000, false); + bitfield.set(500000, true); + bitfield.set(500000, false); + bitfield.set(100000, true); + bitfield.set(100000, false); + bitfield.set(700000, true); + bitfield.set(700000, false); + assert_eq!(bitfield.index_of(true, 0), Some(10000000)); + assert_eq!(bitfield.last_index_of(true, 9999999), None); + + bitfield.set_range(10000010, 10, false); + assert_value_range(&bitfield, 10000000, 10, true); + assert_value_range(&bitfield, 10000010, 10, false); + assert_value_range(&bitfield, 10000020, 30, true); + assert_value_range(&bitfield, 10000050, 9, false); + } +} diff --git a/src/bitfield/fixed.rs b/src/bitfield/fixed.rs new file mode 100644 index 00000000..57ad3b41 --- /dev/null +++ b/src/bitfield/fixed.rs @@ -0,0 +1,228 @@ +pub(crate) const FIXED_BITFIELD_LENGTH: usize = 1024; +pub(crate) const FIXED_BITFIELD_BYTES_LENGTH: usize = FIXED_BITFIELD_LENGTH * 4; +pub(crate) const FIXED_BITFIELD_BITS_LENGTH: usize = FIXED_BITFIELD_BYTES_LENGTH * 8; +// u32 has 4 bytes and a byte has 8 bits +const FIXED_BITFIELD_BITS_PER_ELEM: u32 = 4 * 8; + +use std::convert::TryInto; + +/// Fixed size bitfield +/// see: +/// https://github.com/holepunchto/bits-to-bytes/blob/main/index.js +/// for implementations. +/// TODO: This has been split into segments on the Javascript side "for improved disk performance": +/// https://github.com/hypercore-protocol/hypercore/commit/6392021b11d53041a446e9021c7d79350a052d3d +#[derive(Debug)] +pub(crate) struct FixedBitfield { + pub(crate) dirty: bool, + bitfield: [u32; FIXED_BITFIELD_LENGTH], +} + +impl FixedBitfield { + pub(crate) fn new() -> Self { + Self { + dirty: false, + bitfield: [0; FIXED_BITFIELD_LENGTH], + } + } + + pub(crate) fn from_data(data_index: usize, data: &[u8]) -> Self { + let mut bitfield = [0; FIXED_BITFIELD_LENGTH]; + if data.len() >= data_index + 4 { + let mut i = data_index; + let limit = std::cmp::min(data_index + FIXED_BITFIELD_BYTES_LENGTH, data.len()) - 4; + while i <= limit { + let value: u32 = (data[i] as u32) + | ((data[i + 1] as u32) << 8) + | ((data[i + 2] as u32) << 16) + | ((data[i + 3] as u32) << 24); + bitfield[i / 4] = value; + i += 4; + } + } + Self { + dirty: false, + bitfield, + } + } + + pub(crate) fn to_bytes(&self) -> Box<[u8]> { + let mut data: [u8; FIXED_BITFIELD_BYTES_LENGTH] = [0; FIXED_BITFIELD_BYTES_LENGTH]; + let mut i = 0; + for elem in self.bitfield { + let bytes = &elem.to_le_bytes(); + data[i] = bytes[0]; + data[i + 1] = bytes[1]; + data[i + 2] = bytes[2]; + data[i + 3] = bytes[3]; + i += 4; + } + data.into() + } + + pub(crate) fn get(&self, index: u32) -> bool { + let n = FIXED_BITFIELD_BITS_PER_ELEM; + let offset = index & (n - 1); + let i: usize = ((index - offset) / n) + .try_into() + .expect("Could not fit 64 bit integer to usize on this architecture"); + self.bitfield[i] & (1 << offset) != 0 + } + + pub(crate) fn set(&mut self, index: u32, value: bool) -> bool { + let n = FIXED_BITFIELD_BITS_PER_ELEM; + let offset = index & (n - 1); + let i: usize = ((index - offset) / n) + .try_into() + .expect("Could not fit 64 bit integer to usize on this architecture"); + let mask = 1 << offset; + + if value { + if (self.bitfield[i] & mask) != 0 { + return false; + } + } else if (self.bitfield[i] & mask) == 0 { + return false; + } + self.bitfield[i] ^= mask; + true + } + + pub(crate) fn set_range(&mut self, start: u32, length: u32, value: bool) -> bool { + let end: u32 = start + length; + let n = FIXED_BITFIELD_BITS_PER_ELEM; + + let mut remaining: i64 = end as i64 - start as i64; + let mut offset = start & (n - 1); + let mut i: usize = ((start - offset) / n).try_into().unwrap(); + + let mut changed = false; + + while remaining > 0 { + let base: u32 = 2; + let power: u32 = std::cmp::min(remaining, (n - offset).into()) + .try_into() + .unwrap(); + let mask_seed = if power == 32 { + // Go directly to this maximum value as the below + // calculation overflows as 1 is subtracted after + // the power. + u32::MAX + } else { + base.pow(power) - 1 + }; + let mask: u32 = mask_seed << offset; + + if value { + if (self.bitfield[i] & mask) != mask { + self.bitfield[i] |= mask; + changed = true; + } + } else if (self.bitfield[i] & mask) != 0 { + self.bitfield[i] &= !mask; + changed = true; + } + + remaining -= (n - offset) as i64; + offset = 0; + i += 1; + } + + changed + } + + /// Finds the first index of the value after given position. Returns None if not found. + pub(crate) fn index_of(&self, value: bool, position: u32) -> Option { + (position..FIXED_BITFIELD_BITS_LENGTH as u32).find(|&i| self.get(i) == value) + } + + /// Finds the last index of the value before given position. Returns None if not found. + pub(crate) fn last_index_of(&self, value: bool, position: u32) -> Option { + (0..position + 1).rev().find(|&i| self.get(i) == value) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn assert_value_range(bitfield: &FixedBitfield, start: u32, length: u32, value: bool) { + for i in start..start + length { + assert_eq!(bitfield.get(i), value); + } + } + + #[test] + fn bitfield_fixed_get_and_set() { + let mut bitfield = FixedBitfield::new(); + assert_value_range(&bitfield, 0, 9, false); + assert_eq!(bitfield.index_of(true, 0), None); + assert_eq!(bitfield.index_of(false, 0), Some(0)); + assert_eq!(bitfield.last_index_of(true, 9), None); + assert_eq!(bitfield.last_index_of(false, 9), Some(9)); + + bitfield.set(0, true); + assert!(bitfield.get(0)); + assert_eq!(bitfield.index_of(true, 0), Some(0)); + assert_eq!(bitfield.index_of(false, 0), Some(1)); + assert_eq!(bitfield.last_index_of(true, 9), Some(0)); + assert_eq!(bitfield.last_index_of(false, 9), Some(9)); + assert_eq!(bitfield.last_index_of(false, 0), None); + + assert_value_range(&bitfield, 1, 63, false); + bitfield.set(31, true); + assert!(bitfield.get(31)); + assert_eq!(bitfield.index_of(true, 1), Some(31)); + assert_eq!(bitfield.index_of(false, 31), Some(32)); + + assert_value_range(&bitfield, 32, 32, false); + assert!(!bitfield.get(32)); + bitfield.set(32, true); + assert!(bitfield.get(32)); + assert_value_range(&bitfield, 33, 31, false); + + assert_value_range(&bitfield, 32760, 8, false); + assert!(!bitfield.get(32767)); + bitfield.set(32767, true); + assert!(bitfield.get(32767)); + assert_value_range(&bitfield, 32760, 7, false); + assert_eq!(bitfield.index_of(true, 33), Some(32767)); + assert_eq!(bitfield.last_index_of(true, 9), Some(0)); + assert_eq!(bitfield.last_index_of(true, 32766), Some(32)); + } + + #[test] + fn bitfield_fixed_set_range() { + let mut bitfield = FixedBitfield::new(); + bitfield.set_range(0, 2, true); + assert_value_range(&bitfield, 0, 2, true); + assert_value_range(&bitfield, 3, 61, false); + + bitfield.set_range(2, 3, true); + assert_value_range(&bitfield, 0, 5, true); + assert_value_range(&bitfield, 5, 59, false); + + bitfield.set_range(1, 3, false); + assert!(bitfield.get(0)); + assert_value_range(&bitfield, 1, 3, false); + assert_value_range(&bitfield, 4, 1, true); + assert_value_range(&bitfield, 5, 59, false); + + bitfield.set_range(30, 30070, true); + assert_value_range(&bitfield, 5, 25, false); + assert_value_range(&bitfield, 30, 100, true); + assert_value_range(&bitfield, 30050, 50, true); + assert_value_range(&bitfield, 31000, 50, false); + assert_eq!(bitfield.index_of(true, 20), Some(30)); + assert_eq!(bitfield.index_of(false, 30), Some(30100)); + assert_eq!(bitfield.last_index_of(true, 32000), Some(30099)); + assert_eq!(bitfield.last_index_of(false, 30099), Some(29)); + + bitfield.set_range(32750, 18, true); + assert_value_range(&bitfield, 32750, 18, true); + + bitfield.set_range(32765, 3, false); + assert_value_range(&bitfield, 32750, 15, true); + assert_value_range(&bitfield, 32765, 3, false); + } +} diff --git a/src/bitfield/iterator.rs b/src/bitfield/iterator.rs deleted file mode 100644 index 2c6d3d9a..00000000 --- a/src/bitfield/iterator.rs +++ /dev/null @@ -1,158 +0,0 @@ -//! Iterate over a bitfield. - -use super::Bitfield; - -/// Iterate over a bitfield. -#[derive(Debug)] -pub struct Iterator<'a> { - start: u64, - end: u64, - index_end: u64, - pos: Option, - byte: u8, - bitfield: &'a mut Bitfield, -} - -impl<'a> Iterator<'a> { - /// Create a new instance. - pub fn new(bitfield: &'a mut Bitfield) -> Self { - Self { - start: 0, - end: 0, - index_end: 0, - pos: Some(0), - byte: 0, - bitfield, - } - } - - /// Grow the bitfield if needed. - pub fn range(&mut self, start: u64, end: u64) { - self.start = start; - self.end = end; - self.index_end = 2 * ((end + 31) / 32); - - if self.end > self.bitfield.length { - self.bitfield.expand(self.end); - } - } - - /// Seek to `offset` - pub fn seek(&mut self, mut offset: u64) -> &mut Self { - offset += self.start; - // FIXME This is fishy. Offset and start is unsigned, so `offset < self.start` can only - // be true when the previous addition overflows. The overflow would cause a panic, so, - // either the addition should be a wrapping_add, or rather, the original offset should - // be checked to ensure it is less than `self.end - self.start`. - if offset < self.start { - offset = self.start; - } - - if offset >= self.end { - self.pos = None; - return self; - } - - let o = offset % 8; - - let pos = offset / 8; - self.pos = Some(pos); - - self.byte = self.bitfield.data.get_byte(pos as usize) - | self.bitfield.masks.data_iterate[o as usize]; - - self - } - - pub fn next(&mut self) -> Option { - let mut pos = self.pos?; - - let mut free = self.bitfield.masks.next_data_0_bit[self.byte as usize]; - - while free == -1 { - pos += 1; - self.byte = self.bitfield.data.get_byte(pos as usize); - free = self.bitfield.masks.next_data_0_bit[self.byte as usize]; - - if free == -1 { - pos = self.skip_ahead(pos)?; - - self.byte = self.bitfield.data.get_byte(pos as usize); - free = self.bitfield.masks.next_data_0_bit[self.byte as usize]; - } - } - self.pos = Some(pos); - - self.byte |= self.bitfield.masks.data_iterate[free as usize + 1]; - - let n = 8 * pos + free as u64; - if n < self.end { - Some(n) - } else { - None - } - } - - pub fn skip_ahead(&mut self, start: u64) -> Option { - let bitfield_index = &self.bitfield.index; - let tree_end = self.index_end; - let iter = &mut self.bitfield.iterator; - let o = start & 3; - - iter.seek(2 * (start / 4)); - - let mut tree_byte = bitfield_index.get_byte(iter.index() as usize) - | self.bitfield.masks.index_iterate[o as usize]; - - while self.bitfield.masks.next_index_0_bit[tree_byte as usize] == -1 { - if iter.is_left() { - iter.next(); - } else { - iter.next(); - iter.parent(); - } - - if right_span(iter) >= tree_end { - while right_span(iter) >= tree_end && is_parent(iter) { - iter.left_child(); - } - if right_span(iter) >= tree_end { - return None; - } - } - - tree_byte = bitfield_index.get_byte(iter.index() as usize); - } - - while iter.factor() > 2 { - if self.bitfield.masks.next_index_0_bit[tree_byte as usize] < 2 { - iter.left_child(); - } else { - iter.right_child(); - } - - tree_byte = bitfield_index.get_byte(iter.index() as usize); - } - - let mut free = self.bitfield.masks.next_index_0_bit[tree_byte as usize]; - if free == -1 { - free = 4; - } - - let next = iter.index() * 2 + free as u64; - - if next <= start { - Some(start + 1) - } else { - Some(next) - } - } -} - -fn right_span(iter: &flat_tree::Iterator) -> u64 { - iter.index() + iter.factor() / 2 - 1 -} - -fn is_parent(iter: &flat_tree::Iterator) -> bool { - iter.index() & 1 == 1 -} diff --git a/src/bitfield/masks.rs b/src/bitfield/masks.rs deleted file mode 100644 index 2f9f0704..00000000 --- a/src/bitfield/masks.rs +++ /dev/null @@ -1,108 +0,0 @@ -//! Masks used to determine how to update bytes. -//! -//! This piece of code is still a bit unclear; lots of magic numbers. It'd be -//! good to figure out what things mean. - -#[derive(Debug)] -pub(super) struct Masks { - pub index_update: Vec, - pub index_iterate: Vec, - pub data_iterate: Vec, - pub data_update: Vec, - pub map_parent_right: Vec, - pub map_parent_left: Vec, - pub next_data_0_bit: Vec, - pub next_index_0_bit: Vec, - pub total_1_bits: Vec, -} - -// Masks are underscored at every 8 bytes. -impl Masks { - pub fn new() -> Self { - let index_update = vec![ - 0b00_11_11_11, // 63 - 0b11_00_11_11, // 207 - 0b11_11_00_11, // 243 - 0b11_11_11_00, // 252 - ]; - - let index_iterate = vec![ - 0b00_00_00_00, // 0 - 0b11_00_00_00, // 192 - 0b11_11_00_00, // 240 - 0b11_11_11_00, // 252 - ]; - - let data_iterate = vec![ - 0b00_00_00_00, // 0 - 0b10_00_00_00, // 128 - 0b11_00_00_00, // 192 - 0b11_10_00_00, // 224 - 0b11_11_00_00, // 240 - 0b11_11_10_00, // 248 - 0b11_11_11_00, // 252 - 0b11_11_11_10, // 254 - 0b11_11_11_11, // 255 - ]; - - let data_update = vec![ - 0b01_11_11_11, // 127 - 0b10_11_11_11, // 191 - 0b11_01_11_11, // 223 - 0b11_10_11_11, // 239 - 0b11_11_01_11, // 247 - 0b11_11_10_11, // 251 - 0b11_11_11_01, // 253 - 0b11_11_11_10, // 254 - ]; - - let mut map_parent_right = vec![0; 256]; - let mut map_parent_left = vec![0; 256]; - - for i in 0..256 { - let a = (i & (15 << 4)) >> 4; - let b = i & 15; - - let left = if a == 15 { - 3 - } else if a == 0 { - 0 - } else { - 1 - }; - - let right = if b == 15 { - 3 - } else if b == 0 { - 0 - } else { - 1 - }; - - map_parent_right[i] = left | right; - map_parent_left[i] = map_parent_right[i] << 4; - } - - let total_1_bits: Vec<_> = (0..256).map(|n| (n as u8).count_ones() as u8).collect(); - - let mut next_data_0_bit: Vec<_> = (0..256) - .map(|n| (!n as u8).leading_zeros() as i16) - .collect(); - next_data_0_bit[255] = -1; - - let mut next_index_0_bit: Vec<_> = next_data_0_bit.iter().map(|n| n / 2).collect(); - next_index_0_bit[255] = -1; - - Self { - index_update, - index_iterate, - data_iterate, - data_update, - map_parent_right, - map_parent_left, - next_data_0_bit, - next_index_0_bit, - total_1_bits, - } - } -} diff --git a/src/bitfield/mod.rs b/src/bitfield/mod.rs index b261c2a6..9daa246c 100644 --- a/src/bitfield/mod.rs +++ b/src/bitfield/mod.rs @@ -1,377 +1,4 @@ -//! Bitfield module. Exposes `{data, tree, index}` internally. Serializable to -//! disk. -//! -//! TODO(yw): Document the magic mask format. (Will help to look at binary -//! versions of the numbers). -//! -//! TODO(yw): Document the use cases for this module, especially when opposed to -//! `sparse_bitfield`. -//! -//! NOTE(yw): in the JavaScript version, this code uses a single pager under the -//! hood. Because of Rust's borrow rules, that would be tricky to pull off for -//! us. So instead we've chosen to create three separate instances, with three -//! separate pagers powering it. -//! This means that when serializing to disk, we need to weave the contents of -//! all three of the pagers into a single instance. And when restoring it from -//! disk, we must do so again. -//! We need to make sure the performance impact of this stays well within -//! bounds. +mod dynamic; +mod fixed; -mod iterator; -mod masks; - -use self::masks::Masks; -use flat_tree::{self, Iterator as FlatIterator}; -pub use sparse_bitfield::{Bitfield as SparseBitfield, Change}; -use std::ops::Range; - -/// Bitfield with `{data, tree, index} fields.` -#[derive(Debug)] -pub struct Bitfield { - data: SparseBitfield, - index: SparseBitfield, - page_len: u64, - length: u64, - masks: Masks, - iterator: FlatIterator, -} - -impl Bitfield { - /// Create a new instance. - pub fn new() -> (Self, SparseBitfield) { - let s = Self { - data: SparseBitfield::new(1024), - index: SparseBitfield::new(256), - page_len: 3328, - length: 0, - masks: Masks::new(), - iterator: FlatIterator::new(0), - }; - (s, SparseBitfield::new(2048)) - } - - /// Create new instance from byteslice - pub fn from_slice(slice: &[u8]) -> (Self, SparseBitfield) { - // khodzha: - // slice is packed as data|tree|index|data|tree|index|... - // so for each 1024 + 2048 + 256 bytes - // we extract first 1024 bytes to data - // then next 2048 bytes to tree - // then next 256 bytes to index - let mut data = SparseBitfield::new(1024); - let mut tree = SparseBitfield::new(2048); - let mut index = SparseBitfield::new(256); - slice - .chunks_exact(1024 + 2048 + 256) - .enumerate() - .for_each(|(page_idx, chunk)| { - chunk.iter().enumerate().for_each(|(idx, byte)| { - if idx < 1024 { - data.set_byte(page_idx * 1024 + idx, *byte); - } else if idx < 1024 + 2048 { - tree.set_byte(page_idx * 1024 + (idx - 1024), *byte); - } else { - index.set_byte(page_idx * 1024 + (idx - 1024 - 2048), *byte); - } - }); - }); - let length = data.len() as u64; - let s = Self { - data, - index, - length, - page_len: 3328, - masks: Masks::new(), - iterator: FlatIterator::new(0), - }; - - (s, tree) - } - - /// Convert to vec - pub fn to_bytes(&self, tree: &tree_index::TreeIndex) -> std::io::Result> { - let tree = tree.as_bitfield(); - let data_bytes = self.data.to_bytes()?; - let tree_bytes = tree.to_bytes()?; - let index_bytes = self.index.to_bytes()?; - - let max_pages_len = std::cmp::max( - std::cmp::max(self.data.page_len(), tree.page_len()), - self.index.page_len(), - ); - - let data_ps = self.data.page_size(); - let tree_ps = tree.page_size(); - let index_ps = self.index.page_size(); - - let total_ps = data_ps + tree_ps + index_ps; - - let mut vec = Vec::with_capacity(max_pages_len * total_ps); - - for i in 0..max_pages_len { - extend_buf_from_slice(&mut vec, &data_bytes, i, data_ps); - extend_buf_from_slice(&mut vec, &tree_bytes, i, tree_ps); - extend_buf_from_slice(&mut vec, &index_bytes, i, index_ps); - } - - Ok(vec) - } - - /// Get the current length - pub fn len(&self) -> u64 { - self.length - } - - /// Returns `true` if the bitfield is empty - pub fn is_empty(&self) -> bool { - self.length == 0 - } - - /// Set a value at an index. - pub fn set(&mut self, index: u64, value: bool) -> Change { - let o = mask_8b(index); - let index = (index - o) / 8; - - let value = if value { - self.data.get_byte(index as usize) | 128 >> o - } else { - self.data.get_byte(index as usize) & self.masks.data_update[o as usize] - }; - - if self.data.set_byte(index as usize, value).is_unchanged() { - return Change::Unchanged; - } - - self.length = self.data.len() as u64; - self.set_index(index, value); - Change::Changed - } - - /// Get a value at a position in the bitfield. - pub fn get(&mut self, index: u64) -> bool { - self.data.get(index as usize) - } - - /// Calculate the total for the whole data. - pub fn total(&mut self) -> u8 { - let len = self.data.len() as u64; - self.total_with_range(0..len) - } - - /// Calculate the total of ... TODO(yw) - pub fn total_with_start(&mut self, start: u64) -> u8 { - let len = self.data.len() as u64; - self.total_with_range(start..len) - } - - /// Calculate the total of ... TODO(yw) - pub fn total_with_range(&mut self, range: Range) -> u8 { - let start = range.start; - let end = range.end; - - if end < start { - return 0; - } - - if end > self.data.len() as u64 { - self.expand(end); - } - - let o = mask_8b(start); - let e = mask_8b(end); - - let pos = (start - o) / 8; - let last = (end - e) / 8; - - let left_mask = 255 - self.masks.data_iterate[o as usize]; - let right_mask = self.masks.data_iterate[e as usize]; - - let byte = self.data.get_byte(pos as usize); - if pos == last { - let index = (byte & left_mask & right_mask) as u64; - return self.masks.total_1_bits[index as usize]; - } - let index = (byte & left_mask) as u64; - let mut total = self.masks.total_1_bits[index as usize]; - - for i in pos + 1..last { - let index = self.data.get_byte(i as usize) as u64; - total += self.masks.total_1_bits[index as usize]; - } - - let index: u64 = self.data.get_byte(last as usize) as u64 & right_mask as u64; - total + self.masks.total_1_bits[index as usize] - } - - /// Set a value at index. - /// - ///```txt - /// (a + b | c + d | e + f | g + h) - /// -> (a | b | c | d) (e | f | g | h) - ///``` - /// - /// NOTE(yw): lots of magic values going on; I have no idea what we're doing - /// here. - fn set_index(&mut self, mut index: u64, value: u8) -> Change { - let o = index & 3; - index = (index - o) / 4; - - let start = tree_index(index); - - let left = self.index.get_byte(start as usize) & self.masks.index_update[o as usize]; - let right = get_index_value(value) >> tree_index(o); - let mut byte = left | right; - let len = self.index.len(); - let max_len = self.data.page_len() * 256; - - self.iterator.seek(start); - - while self.iterator.index() < max_len as u64 - && self - .index - .set_byte(self.iterator.index() as usize, byte) - .is_changed() - { - if self.iterator.is_left() { - let index: u64 = self.index.get_byte(self.iterator.sibling() as usize).into(); - byte = self.masks.map_parent_left[byte as usize] - | self.masks.map_parent_right[index as usize]; - } else { - let index: u64 = self - .index - .get_byte(self.iterator.sibling() as usize) // FIXME: out of bounds read - .into(); - byte = self.masks.map_parent_right[byte as usize] - | self.masks.map_parent_left[index as usize]; - } - self.iterator.parent(); - } - - if len != self.index.len() { - self.expand(len as u64); - } - - if self.iterator.index() == start { - Change::Unchanged - } else { - Change::Changed - } - } - - fn expand(&mut self, len: u64) { - let mut roots = vec![]; // FIXME: alloc. - flat_tree::full_roots(tree_index(len), &mut roots); - let bf = &mut self.index; - let ite = &mut self.iterator; - let masks = &self.masks; - let mut byte; - - for root in roots { - ite.seek(root); - byte = bf.get_byte(ite.index() as usize); - - loop { - if ite.is_left() { - let index = bf.get_byte(ite.sibling() as usize) as u64; - byte = masks.map_parent_left[byte as usize] - | masks.map_parent_right[index as usize]; - } else { - let index = bf.get_byte(ite.sibling() as usize) as u64; - byte = masks.map_parent_right[byte as usize] - | masks.map_parent_left[index as usize]; - } - - if set_byte_no_alloc(bf, ite.parent(), byte).is_unchanged() { - break; - } - } - } - } - - // TODO: use the index to speed this up *a lot* - /// https://github.com/mafintosh/hypercore/blob/06f3a1f573cb74ee8cfab2742455318fbf7cc3a2/lib/bitfield.js#L111-L126 - pub fn compress(&self, start: usize, length: usize) -> std::io::Result> { - // On Node versions this fields might not be present on the want/request message - // When both start and length are not present (!0 in node is false), return all data bytes encoded - if start == 0 && length == 0 { - return Ok(bitfield_rle::encode(&self.data.to_bytes()?)); - } - - use std::io::{Cursor, Write}; - let mut buf = Cursor::new(Vec::with_capacity(length)); - - let page_size = self.data.page_size() as f64; - let mut p = start as f64 / page_size / 8.0; - let end = p + length as f64 / page_size / 8.0; - let offset = p * page_size; - - while p < end { - let index = p as usize; - let page = self.data.pages.get(index); - if let Some(page) = page { - if page.len() != 0 { - buf.set_position((p * page_size - offset) as u64); - buf.write_all(&page)?; - } - } - p += 1.0; - } - - Ok(bitfield_rle::encode(&buf.into_inner())) - } - - /// Constructs an iterator from start to end - pub fn iterator(&mut self) -> iterator::Iterator<'_> { - let len = self.length; - self.iterator_with_range(0, len) - } - - /// Constructs an iterator from `start` to `end` - pub fn iterator_with_range(&mut self, start: u64, end: u64) -> iterator::Iterator<'_> { - let mut iter = iterator::Iterator::new(self); - iter.range(start, end); - iter.seek(0); - - iter - } -} - -// NOTE: can we move this into `sparse_bitfield`? -fn set_byte_no_alloc(bf: &mut SparseBitfield, index: u64, byte: u8) -> Change { - if 8 * index >= bf.len() as u64 { - return Change::Unchanged; - } - bf.set_byte(index as usize, byte) -} - -#[inline] -fn get_index_value(index: u8) -> u8 { - match index { - 255 => 192, - 0 => 0, - _ => 64, - } -} - -#[inline] -fn mask_8b(num: u64) -> u64 { - num & 7 -} - -/// Convert the index to the index in the tree. -#[inline] -fn tree_index(index: u64) -> u64 { - 2 * index -} - -// copies slice to buf or fills buf with len-of-slice zeros -fn extend_buf_from_slice(buf: &mut Vec, bytes: &[u8], i: usize, pagesize: usize) { - if i * pagesize >= bytes.len() { - for _ in 0..pagesize { - buf.push(0); - } - } else { - let range = (i * pagesize)..((i + 1) * pagesize); - buf.extend_from_slice(&bytes[range]); - } -} +pub(crate) use dynamic::DynamicBitfield as Bitfield; diff --git a/src/builder.rs b/src/builder.rs new file mode 100644 index 00000000..4e18dad2 --- /dev/null +++ b/src/builder.rs @@ -0,0 +1,100 @@ +use random_access_storage::RandomAccess; +use std::fmt::Debug; +#[cfg(feature = "cache")] +use std::time::Duration; +use tracing::instrument; + +#[cfg(feature = "cache")] +use crate::common::cache::CacheOptions; +use crate::{core::HypercoreOptions, Hypercore, HypercoreError, PartialKeypair, Storage}; + +/// Build CacheOptions. +#[cfg(feature = "cache")] +#[derive(Debug)] +pub struct CacheOptionsBuilder(CacheOptions); + +#[cfg(feature = "cache")] +impl Default for CacheOptionsBuilder { + fn default() -> Self { + Self::new() + } +} + +#[cfg(feature = "cache")] +impl CacheOptionsBuilder { + /// Create a CacheOptions builder with default options + pub fn new() -> Self { + Self(CacheOptions::new()) + } + + /// Set cache time to live. + pub fn time_to_live(mut self, time_to_live: Duration) -> Self { + self.0.time_to_live = Some(time_to_live); + self + } + + /// Set cache time to idle. + pub fn time_to_idle(mut self, time_to_idle: Duration) -> Self { + self.0.time_to_idle = Some(time_to_idle); + self + } + + /// Set cache max capacity in bytes. + pub fn max_capacity(mut self, max_capacity: u64) -> Self { + self.0.max_capacity = Some(max_capacity); + self + } + + /// Build new cache options. + pub(crate) fn build(self) -> CacheOptions { + self.0 + } +} + +/// Build a Hypercore instance with options. +#[derive(Debug)] +pub struct HypercoreBuilder +where + T: RandomAccess + Debug + Send, +{ + storage: Storage, + options: HypercoreOptions, +} + +impl HypercoreBuilder +where + T: RandomAccess + Debug + Send, +{ + /// Create a hypercore builder with a given storage + pub fn new(storage: Storage) -> Self { + Self { + storage, + options: HypercoreOptions::new(), + } + } + + /// Set key pair. + pub fn key_pair(mut self, key_pair: PartialKeypair) -> Self { + self.options.key_pair = Some(key_pair); + self + } + + /// Set open. + pub fn open(mut self, open: bool) -> Self { + self.options.open = open; + self + } + + /// Set node cache options. + #[cfg(feature = "cache")] + pub fn node_cache_options(mut self, builder: CacheOptionsBuilder) -> Self { + self.options.node_cache_options = Some(builder.build()); + self + } + + /// Build a new Hypercore. + #[instrument(err, skip_all)] + pub async fn build(self) -> Result, HypercoreError> { + Hypercore::new(self.storage, self.options).await + } +} diff --git a/src/common/cache.rs b/src/common/cache.rs new file mode 100644 index 00000000..fc6a4961 --- /dev/null +++ b/src/common/cache.rs @@ -0,0 +1,58 @@ +use moka::sync::Cache; +use std::time::Duration; + +use crate::Node; + +// Default to 1 year of cache +const DEFAULT_CACHE_TTL_SEC: u64 = 31556952; +const DEFAULT_CACHE_TTI_SEC: u64 = 31556952; +// Default to 100kb of node cache +const DEFAULT_CACHE_MAX_SIZE: u64 = 100000; +const NODE_WEIGHT: u32 = + // Byte size of a Node based on the fields. + 3 * 8 + 32 + 4 + + // Then 8 for key and guesstimate 8 bytes of overhead. + 8 + 8; + +#[derive(Debug, Clone)] +pub(crate) struct CacheOptions { + pub(crate) time_to_live: Option, + pub(crate) time_to_idle: Option, + pub(crate) max_capacity: Option, +} + +impl CacheOptions { + pub(crate) fn new() -> Self { + Self { + time_to_live: None, + time_to_idle: None, + max_capacity: None, + } + } + + pub(crate) fn to_node_cache(&self, initial_nodes: Vec) -> Cache { + let cache = if self.time_to_live.is_some() || self.time_to_idle.is_some() { + Cache::builder() + .time_to_live( + self.time_to_live + .unwrap_or_else(|| Duration::from_secs(DEFAULT_CACHE_TTL_SEC)), + ) + .time_to_idle( + self.time_to_idle + .unwrap_or_else(|| Duration::from_secs(DEFAULT_CACHE_TTI_SEC)), + ) + .max_capacity(self.max_capacity.unwrap_or(DEFAULT_CACHE_MAX_SIZE)) + .weigher(|_, _| NODE_WEIGHT) + .build() + } else { + Cache::builder() + .max_capacity(self.max_capacity.unwrap_or(DEFAULT_CACHE_MAX_SIZE)) + .weigher(|_, _| NODE_WEIGHT) + .build() + }; + for node in initial_nodes { + cache.insert(node.index, node); + } + cache + } +} diff --git a/src/common/error.rs b/src/common/error.rs new file mode 100644 index 00000000..89ec0b37 --- /dev/null +++ b/src/common/error.rs @@ -0,0 +1,78 @@ +use compact_encoding::EncodingError; +use thiserror::Error; + +use crate::Store; + +/// Common error type for the hypercore interface +#[derive(Error, Debug)] +pub enum HypercoreError { + /// Bad argument + #[error("Bad argument. {context}")] + BadArgument { + /// Context for the error + context: String, + }, + /// Not writable + #[error("Hypercore not writable")] + NotWritable, + /// Invalid signature + #[error("Given signature was invalid. {context}")] + InvalidSignature { + /// Context for the error + context: String, + }, + /// Invalid checksum + #[error("Invalid checksum. {context}")] + InvalidChecksum { + /// Context for the error + context: String, + }, + /// Empty storage + #[error("Empty storage: {store}.")] + EmptyStorage { + /// Store that was found empty + store: Store, + }, + /// Corrupt storage + #[error("Corrupt storage: {store}.{}", + .context.as_ref().map_or_else(String::new, |ctx| format!(" Context: {ctx}.")))] + CorruptStorage { + /// Store that was corrupt + store: Store, + /// Context for the error + context: Option, + }, + /// Invalid operation + #[error("Invalid operation. {context}")] + InvalidOperation { + /// Context for the error + context: String, + }, + /// Unexpected IO error occured + #[error("Unrecoverable input/output error occured.{}", + .context.as_ref().map_or_else(String::new, |ctx| format!(" {ctx}.")))] + IO { + /// Context for the error + context: Option, + /// Original source error + #[source] + source: std::io::Error, + }, +} + +impl From for HypercoreError { + fn from(err: std::io::Error) -> Self { + Self::IO { + context: None, + source: err, + } + } +} + +impl From for HypercoreError { + fn from(err: EncodingError) -> Self { + Self::InvalidOperation { + context: format!("Encoding failed: {err}"), + } + } +} diff --git a/src/common/mod.rs b/src/common/mod.rs new file mode 100644 index 00000000..f5fb6baf --- /dev/null +++ b/src/common/mod.rs @@ -0,0 +1,23 @@ +#[cfg(feature = "cache")] +pub(crate) mod cache; +mod error; +mod node; +mod peer; +mod store; + +pub use self::error::HypercoreError; +pub use self::node::Node; +pub(crate) use self::node::NodeByteRange; +pub(crate) use self::peer::ValuelessProof; +pub use self::peer::{ + DataBlock, DataHash, DataSeek, DataUpgrade, Proof, RequestBlock, RequestSeek, RequestUpgrade, +}; +pub use self::store::Store; +pub(crate) use self::store::{StoreInfo, StoreInfoInstruction, StoreInfoType}; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct BitfieldUpdate { + pub(crate) drop: bool, + pub(crate) start: u64, + pub(crate) length: u64, +} diff --git a/src/storage/node.rs b/src/common/node.rs similarity index 65% rename from src/storage/node.rs rename to src/common/node.rs index ba1fc416..7e339d37 100644 --- a/src/storage/node.rs +++ b/src/common/node.rs @@ -1,16 +1,19 @@ -use anyhow::ensure; -use anyhow::Result; -use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt}; use merkle_tree_stream::Node as NodeTrait; use merkle_tree_stream::{NodeKind, NodeParts}; use pretty_hash::fmt as pretty_fmt; use std::cmp::Ordering; use std::convert::AsRef; use std::fmt::{self, Display}; -use std::io::Cursor; use crate::crypto::Hash; +/// Node byte range +#[derive(Debug, Clone, PartialEq, Eq)] +pub(crate) struct NodeByteRange { + pub(crate) index: u64, + pub(crate) length: u64, +} + /// Nodes that are persisted to disk. // TODO: replace `hash: Vec` with `hash: Hash`. This requires patching / // rewriting the Blake2b crate to support `.from_bytes()` to serialize from @@ -22,53 +25,40 @@ pub struct Node { pub(crate) length: u64, pub(crate) parent: u64, pub(crate) data: Option>, + pub(crate) blank: bool, } impl Node { /// Create a new instance. // TODO: ensure sizes are correct. pub fn new(index: u64, hash: Vec, length: u64) -> Self { + let mut blank = true; + for byte in &hash { + if *byte != 0 { + blank = false; + break; + } + } Self { index, hash, - length: length as u64, + length, parent: flat_tree::parent(index), data: Some(Vec::with_capacity(0)), + blank, } } - /// Convert a vector to a new instance. - /// - /// Requires the index at which the buffer was read to be passed. - pub fn from_bytes(index: u64, buffer: &[u8]) -> Result { - ensure!(buffer.len() == 40, "buffer should be 40 bytes"); - - let parent = flat_tree::parent(index); - let mut reader = Cursor::new(buffer); - - // TODO: subslice directly, move cursor forward. - let capacity = 32; - let mut hash = Vec::with_capacity(capacity); - for _ in 0..capacity { - hash.push(reader.read_u8()?); - } - - let length = reader.read_u64::()?; - Ok(Self { - hash, - length, + /// Creates a new blank node + pub fn new_blank(index: u64) -> Self { + Self { index, - parent, - data: Some(Vec::with_capacity(0)), - }) - } - - /// Convert to a buffer that can be written to disk. - pub fn to_bytes(&self) -> Result> { - let mut writer = Vec::with_capacity(40); - writer.extend_from_slice(&self.hash); - writer.write_u64::(self.length as u64)?; - Ok(writer) + hash: vec![0, 32], + length: 0, + parent: 0, + data: None, + blank: true, + } } } @@ -85,7 +75,7 @@ impl NodeTrait for Node { #[inline] fn len(&self) -> u64 { - self.length as u64 + self.length } #[inline] @@ -137,13 +127,22 @@ impl From> for Node { NodeKind::Leaf(data) => Some(data.clone()), NodeKind::Parent => None, }; + let hash: Vec = parts.hash().as_bytes().into(); + let mut blank = true; + for byte in &hash { + if *byte != 0 { + blank = false; + break; + } + } Node { index: partial.index(), parent: partial.parent, - length: partial.len() as u64, - hash: parts.hash().as_bytes().into(), + length: partial.len(), + hash, data, + blank, } } } diff --git a/src/common/peer.rs b/src/common/peer.rs new file mode 100644 index 00000000..c71b9818 --- /dev/null +++ b/src/common/peer.rs @@ -0,0 +1,117 @@ +//! Types needed for passing information with with peers. +//! hypercore-protocol-rs uses these types and wraps them +//! into wire messages. +use crate::Node; + +#[derive(Debug, Clone, PartialEq)] +/// Request of a DataBlock or DataHash from peer +pub struct RequestBlock { + /// Hypercore index + pub index: u64, + /// TODO: document + pub nodes: u64, +} + +#[derive(Debug, Clone, PartialEq)] +/// Request of a DataSeek from peer +pub struct RequestSeek { + /// TODO: document + pub bytes: u64, +} + +#[derive(Debug, Clone, PartialEq)] +/// Request of a DataUpgrade from peer +pub struct RequestUpgrade { + /// Hypercore start index + pub start: u64, + /// Length of elements + pub length: u64, +} + +#[derive(Debug, Clone, PartialEq)] +/// Proof generated from corresponding requests +pub struct Proof { + /// Fork + pub fork: u64, + /// Data block. + pub block: Option, + /// Data hash + pub hash: Option, + /// Data seek + pub seek: Option, + /// Data updrade + pub upgrade: Option, +} + +#[derive(Debug, Clone, PartialEq)] +/// Valueless proof generated from corresponding requests +pub(crate) struct ValuelessProof { + pub(crate) fork: u64, + /// Data block. NB: The ValuelessProof struct uses the Hash type because + /// the stored binary value is processed externally to the proof. + pub(crate) block: Option, + pub(crate) hash: Option, + pub(crate) seek: Option, + pub(crate) upgrade: Option, +} + +impl ValuelessProof { + pub(crate) fn into_proof(mut self, block_value: Option>) -> Proof { + let block = self.block.take().map(|block| DataBlock { + index: block.index, + nodes: block.nodes, + value: block_value.expect("Data block needs to be given"), + }); + Proof { + fork: self.fork, + block, + hash: self.hash.take(), + seek: self.seek.take(), + upgrade: self.upgrade.take(), + } + } +} + +#[derive(Debug, Clone, PartialEq)] +/// Block of data to peer +pub struct DataBlock { + /// Hypercore index + pub index: u64, + /// Data block value in bytes + pub value: Vec, + /// TODO: document + pub nodes: Vec, +} + +#[derive(Debug, Clone, PartialEq)] +/// Data hash to peer +pub struct DataHash { + /// Hypercore index + pub index: u64, + /// TODO: document + pub nodes: Vec, +} + +#[derive(Debug, Clone, PartialEq)] +/// TODO: Document +pub struct DataSeek { + /// TODO: Document + pub bytes: u64, + /// TODO: Document + pub nodes: Vec, +} + +#[derive(Debug, Clone, PartialEq)] +/// TODO: Document +pub struct DataUpgrade { + /// TODO: Document + pub start: u64, + /// TODO: Document + pub length: u64, + /// TODO: Document + pub nodes: Vec, + /// TODO: Document + pub additional_nodes: Vec, + /// TODO: Document + pub signature: Vec, +} diff --git a/src/common/store.rs b/src/common/store.rs new file mode 100644 index 00000000..357ebc03 --- /dev/null +++ b/src/common/store.rs @@ -0,0 +1,155 @@ +/// The types of stores that can be created. +#[derive(Debug, Clone, PartialEq)] +pub enum Store { + /// Tree + Tree, + /// Data (block store) + Data, + /// Bitfield + Bitfield, + /// Oplog + Oplog, +} + +impl std::fmt::Display for Store { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Store::Tree => write!(f, "tree"), + Store::Data => write!(f, "data"), + Store::Bitfield => write!(f, "bitfield"), + Store::Oplog => write!(f, "oplog"), + } + } +} + +/// Information type about a store. +#[derive(Debug, PartialEq)] +pub(crate) enum StoreInfoType { + /// Read/write content of the store + Content, + /// Size in bytes of the store. When flushed, truncates to the given index. `data` is `None`. + Size, +} + +/// Piece of information about a store. Useful for indicating changes that should be made to random +/// access storages or information read from them. +#[derive(Debug)] +pub(crate) struct StoreInfo { + pub(crate) store: Store, + pub(crate) info_type: StoreInfoType, + pub(crate) index: u64, + pub(crate) length: Option, + pub(crate) data: Option>, + /// When reading, indicates missing value (can be true only if allow_miss is given as instruction). + /// When writing indicates that the value should be dropped. + pub(crate) miss: bool, +} + +impl StoreInfo { + pub(crate) fn new_content(store: Store, index: u64, data: &[u8]) -> Self { + Self { + store, + info_type: StoreInfoType::Content, + index, + length: Some(data.len() as u64), + data: Some(data.into()), + miss: false, + } + } + + pub(crate) fn new_content_miss(store: Store, index: u64) -> Self { + Self { + store, + info_type: StoreInfoType::Content, + index, + length: None, + data: None, + miss: true, + } + } + + pub(crate) fn new_delete(store: Store, index: u64, length: u64) -> Self { + Self { + store, + info_type: StoreInfoType::Content, + index, + length: Some(length), + data: None, + miss: true, + } + } + + pub(crate) fn new_truncate(store: Store, index: u64) -> Self { + Self { + store, + info_type: StoreInfoType::Size, + index, + length: None, + data: None, + miss: true, + } + } + + pub(crate) fn new_size(store: Store, index: u64, length: u64) -> Self { + Self { + store, + info_type: StoreInfoType::Size, + index, + length: Some(length), + data: None, + miss: false, + } + } +} + +/// Represents an instruction to obtain information about a store. +#[derive(Debug)] +pub(crate) struct StoreInfoInstruction { + pub(crate) store: Store, + pub(crate) info_type: StoreInfoType, + pub(crate) index: u64, + pub(crate) length: Option, + pub(crate) allow_miss: bool, +} + +impl StoreInfoInstruction { + pub(crate) fn new_content(store: Store, index: u64, length: u64) -> Self { + Self { + store, + info_type: StoreInfoType::Content, + index, + length: Some(length), + allow_miss: false, + } + } + + pub(crate) fn new_content_allow_miss(store: Store, index: u64, length: u64) -> Self { + Self { + store, + info_type: StoreInfoType::Content, + index, + length: Some(length), + allow_miss: true, + } + } + + pub(crate) fn new_all_content(store: Store) -> Self { + Self { + store, + info_type: StoreInfoType::Content, + index: 0, + length: None, + allow_miss: false, + } + } + + pub(crate) fn new_size(store: Store, index: u64) -> Self { + Self { + store, + info_type: StoreInfoType::Size, + index, + length: None, + allow_miss: false, + } + } +} diff --git a/src/core.rs b/src/core.rs new file mode 100644 index 00000000..fe49e9a2 --- /dev/null +++ b/src/core.rs @@ -0,0 +1,1136 @@ +//! Hypercore's main abstraction. Exposes an append-only, secure log structure. +use ed25519_dalek::Signature; +use futures::future::Either; +use random_access_storage::RandomAccess; +use std::convert::TryFrom; +use std::fmt::Debug; +use tracing::instrument; + +#[cfg(feature = "cache")] +use crate::common::cache::CacheOptions; +use crate::{ + bitfield::Bitfield, + common::{BitfieldUpdate, HypercoreError, NodeByteRange, Proof, StoreInfo, ValuelessProof}, + crypto::{generate_signing_key, PartialKeypair}, + data::BlockStore, + oplog::{Header, Oplog, MAX_OPLOG_ENTRIES_BYTE_SIZE}, + storage::Storage, + tree::{MerkleTree, MerkleTreeChangeset}, + RequestBlock, RequestSeek, RequestUpgrade, +}; + +#[derive(Debug)] +pub(crate) struct HypercoreOptions { + pub(crate) key_pair: Option, + pub(crate) open: bool, + #[cfg(feature = "cache")] + pub(crate) node_cache_options: Option, +} + +impl HypercoreOptions { + pub(crate) fn new() -> Self { + Self { + key_pair: None, + open: false, + #[cfg(feature = "cache")] + node_cache_options: None, + } + } +} + +/// Hypercore is an append-only log structure. +#[derive(Debug)] +pub struct Hypercore +where + T: RandomAccess + Debug, +{ + pub(crate) key_pair: PartialKeypair, + pub(crate) storage: Storage, + pub(crate) oplog: Oplog, + pub(crate) tree: MerkleTree, + pub(crate) block_store: BlockStore, + pub(crate) bitfield: Bitfield, + skip_flush_count: u8, // autoFlush in Javascript + header: Header, +} + +/// Response from append, matches that of the Javascript result +#[derive(Debug)] +pub struct AppendOutcome { + /// Length of the hypercore after append + pub length: u64, + /// Byte length of the hypercore after append + pub byte_length: u64, +} + +/// Info about the hypercore +#[derive(Debug)] +pub struct Info { + /// Length of the hypercore + pub length: u64, + /// Byte length of the hypercore + pub byte_length: u64, + /// Continuous length of entries in the hypercore with data + /// starting from index 0 + pub contiguous_length: u64, + /// Fork index. 0 if hypercore not forked. + pub fork: u64, + /// True if hypercore is writeable, false if read-only + pub writeable: bool, +} + +impl Hypercore +where + T: RandomAccess + Debug + Send, +{ + /// Creates/opens new hypercore using given storage and options + pub(crate) async fn new( + mut storage: Storage, + mut options: HypercoreOptions, + ) -> Result, HypercoreError> { + let key_pair: Option = if options.open { + if options.key_pair.is_some() { + return Err(HypercoreError::BadArgument { + context: "Key pair can not be used when building an openable hypercore" + .to_string(), + }); + } + None + } else { + Some(options.key_pair.take().unwrap_or_else(|| { + let signing_key = generate_signing_key(); + PartialKeypair { + public: signing_key.verifying_key(), + secret: Some(signing_key), + } + })) + }; + + // Open/create oplog + let mut oplog_open_outcome = match Oplog::open(&key_pair, None)? { + Either::Right(value) => value, + Either::Left(instruction) => { + let info = storage.read_info(instruction).await?; + match Oplog::open(&key_pair, Some(info))? { + Either::Right(value) => value, + Either::Left(_) => { + return Err(HypercoreError::InvalidOperation { + context: "Could not open oplog".to_string(), + }); + } + } + } + }; + storage + .flush_infos(&oplog_open_outcome.infos_to_flush) + .await?; + + // Open/create tree + let mut tree = match MerkleTree::open( + &oplog_open_outcome.header.tree, + None, + #[cfg(feature = "cache")] + &options.node_cache_options, + )? { + Either::Right(value) => value, + Either::Left(instructions) => { + let infos = storage.read_infos(&instructions).await?; + match MerkleTree::open( + &oplog_open_outcome.header.tree, + Some(&infos), + #[cfg(feature = "cache")] + &options.node_cache_options, + )? { + Either::Right(value) => value, + Either::Left(_) => { + return Err(HypercoreError::InvalidOperation { + context: "Could not open tree".to_string(), + }); + } + } + } + }; + + // Create block store instance + let block_store = BlockStore::default(); + + // Open bitfield + let mut bitfield = match Bitfield::open(None) { + Either::Right(value) => value, + Either::Left(instruction) => { + let info = storage.read_info(instruction).await?; + match Bitfield::open(Some(info)) { + Either::Right(value) => value, + Either::Left(instruction) => { + let info = storage.read_info(instruction).await?; + match Bitfield::open(Some(info)) { + Either::Right(value) => value, + Either::Left(_) => { + return Err(HypercoreError::InvalidOperation { + context: "Could not open bitfield".to_string(), + }); + } + } + } + } + } + }; + + // Process entries stored only to the oplog and not yet flushed into bitfield or tree + if let Some(entries) = oplog_open_outcome.entries { + for entry in entries.iter() { + for node in &entry.tree_nodes { + tree.add_node(node.clone()); + } + + if let Some(bitfield_update) = &entry.bitfield { + bitfield.update(bitfield_update); + update_contiguous_length( + &mut oplog_open_outcome.header, + &bitfield, + bitfield_update, + ); + } + if let Some(tree_upgrade) = &entry.tree_upgrade { + // TODO: Generalize Either response stack + let mut changeset = + match tree.truncate(tree_upgrade.length, tree_upgrade.fork, None)? { + Either::Right(value) => value, + Either::Left(instructions) => { + let infos = storage.read_infos(&instructions).await?; + match tree.truncate( + tree_upgrade.length, + tree_upgrade.fork, + Some(&infos), + )? { + Either::Right(value) => value, + Either::Left(_) => { + return Err(HypercoreError::InvalidOperation { + context: format!( + "Could not truncate tree to length {}", + tree_upgrade.length + ), + }); + } + } + } + }; + changeset.ancestors = tree_upgrade.ancestors; + changeset.hash = Some(changeset.hash()); + changeset.signature = + Some(Signature::try_from(&*tree_upgrade.signature).map_err(|_| { + HypercoreError::InvalidSignature { + context: "Could not parse changeset signature".to_string(), + } + })?); + + // Update the header with this changeset to make in-memory value match that + // of the stored value. + oplog_open_outcome.oplog.update_header_with_changeset( + &changeset, + None, + &mut oplog_open_outcome.header, + )?; + + // TODO: Skip reorg hints for now, seems to only have to do with replication + // addReorgHint(header.hints.reorgs, tree, batch) + + // Commit changeset to in-memory tree + tree.commit(changeset)?; + } + } + } + + let oplog = oplog_open_outcome.oplog; + let header = oplog_open_outcome.header; + let key_pair = header.key_pair.clone(); + + Ok(Hypercore { + key_pair, + storage, + oplog, + tree, + block_store, + bitfield, + header, + skip_flush_count: 0, + }) + } + + /// Gets basic info about the Hypercore + pub fn info(&self) -> Info { + Info { + length: self.tree.length, + byte_length: self.tree.byte_length, + contiguous_length: self.header.hints.contiguous_length, + fork: self.tree.fork, + writeable: self.key_pair.secret.is_some(), + } + } + + /// Appends a data slice to the hypercore. + #[instrument(err, skip_all, fields(data_len = data.len()))] + pub async fn append(&mut self, data: &[u8]) -> Result { + self.append_batch(&[data]).await + } + + /// Appends a given batch of data slices to the hypercore. + #[instrument(err, skip_all, fields(batch_len = batch.as_ref().len()))] + pub async fn append_batch, B: AsRef<[A]>>( + &mut self, + batch: B, + ) -> Result { + let secret_key = match &self.key_pair.secret { + Some(key) => key, + None => return Err(HypercoreError::NotWritable), + }; + + if !batch.as_ref().is_empty() { + // Create a changeset for the tree + let mut changeset = self.tree.changeset(); + let mut batch_length: usize = 0; + for data in batch.as_ref().iter() { + batch_length += changeset.append(data.as_ref()); + } + changeset.hash_and_sign(secret_key); + + // Write the received data to the block store + let info = + self.block_store + .append_batch(batch.as_ref(), batch_length, self.tree.byte_length); + self.storage.flush_info(info).await?; + + // Append the changeset to the Oplog + let bitfield_update = BitfieldUpdate { + drop: false, + start: changeset.ancestors, + length: changeset.batch_length, + }; + let outcome = self.oplog.append_changeset( + &changeset, + Some(bitfield_update.clone()), + false, + &self.header, + )?; + self.storage.flush_infos(&outcome.infos_to_flush).await?; + self.header = outcome.header; + + // Write to bitfield + self.bitfield.update(&bitfield_update); + + // Contiguous length is known only now + update_contiguous_length(&mut self.header, &self.bitfield, &bitfield_update); + + // Commit changeset to in-memory tree + self.tree.commit(changeset)?; + + // Now ready to flush + if self.should_flush_bitfield_and_tree_and_oplog() { + self.flush_bitfield_and_tree_and_oplog(false).await?; + } + } + + // Return the new value + Ok(AppendOutcome { + length: self.tree.length, + byte_length: self.tree.byte_length, + }) + } + + /// Read value at given index, if any. + #[instrument(err, skip(self))] + pub async fn get(&mut self, index: u64) -> Result>, HypercoreError> { + if !self.bitfield.get(index) { + return Ok(None); + } + + let byte_range = self.byte_range(index, None).await?; + + // TODO: Generalize Either response stack + let data = match self.block_store.read(&byte_range, None) { + Either::Right(value) => value, + Either::Left(instruction) => { + let info = self.storage.read_info(instruction).await?; + match self.block_store.read(&byte_range, Some(info)) { + Either::Right(value) => value, + Either::Left(_) => { + return Err(HypercoreError::InvalidOperation { + context: "Could not read block storage range".to_string(), + }); + } + } + } + }; + + Ok(Some(data.to_vec())) + } + + /// Clear data for entries between start and end (exclusive) indexes. + #[instrument(err, skip(self))] + pub async fn clear(&mut self, start: u64, end: u64) -> Result<(), HypercoreError> { + if start >= end { + // NB: This is what javascript does, so we mimic that here + return Ok(()); + } + // Write to oplog + let infos_to_flush = self.oplog.clear(start, end)?; + self.storage.flush_infos(&infos_to_flush).await?; + + // Set bitfield + self.bitfield.set_range(start, end - start, false); + + // Set contiguous length + if start < self.header.hints.contiguous_length { + self.header.hints.contiguous_length = start; + } + + // Find the biggest hole that can be punched into the data + let start = if let Some(index) = self.bitfield.last_index_of(true, start) { + index + 1 + } else { + 0 + }; + let end = if let Some(index) = self.bitfield.index_of(true, end) { + index + } else { + self.tree.length + }; + + // Find byte offset for first value + let mut infos: Vec = Vec::new(); + let clear_offset = match self.tree.byte_offset(start, None)? { + Either::Right(value) => value, + Either::Left(instructions) => { + let new_infos = self.storage.read_infos_to_vec(&instructions).await?; + infos.extend(new_infos); + match self.tree.byte_offset(start, Some(&infos))? { + Either::Right(value) => value, + Either::Left(_) => { + return Err(HypercoreError::InvalidOperation { + context: format!("Could not read offset for index {start} from tree"), + }); + } + } + } + }; + + // Find byte range for last value + let last_byte_range = self.byte_range(end - 1, Some(&infos)).await?; + + let clear_length = (last_byte_range.index + last_byte_range.length) - clear_offset; + + // Clear blocks + let info_to_flush = self.block_store.clear(clear_offset, clear_length); + self.storage.flush_info(info_to_flush).await?; + + // Now ready to flush + if self.should_flush_bitfield_and_tree_and_oplog() { + self.flush_bitfield_and_tree_and_oplog(false).await?; + } + + Ok(()) + } + + /// Access the key pair. + pub fn key_pair(&self) -> &PartialKeypair { + &self.key_pair + } + + /// Create a proof for given request + #[instrument(err, skip_all)] + pub async fn create_proof( + &mut self, + block: Option, + hash: Option, + seek: Option, + upgrade: Option, + ) -> Result, HypercoreError> { + let valueless_proof = self + .create_valueless_proof(block, hash, seek, upgrade) + .await?; + let value: Option> = if let Some(block) = valueless_proof.block.as_ref() { + let value = self.get(block.index).await?; + if value.is_none() { + // The data value requested in the proof can not be read, we return None here + // and let the party requesting figure out what to do. + return Ok(None); + } + value + } else { + None + }; + Ok(Some(valueless_proof.into_proof(value))) + } + + /// Verify and apply proof received from peer, returns true if changed, false if not + /// possible to apply. + #[instrument(skip_all)] + pub async fn verify_and_apply_proof(&mut self, proof: &Proof) -> Result { + if proof.fork != self.tree.fork { + return Ok(false); + } + let changeset = self.verify_proof(proof).await?; + if !self.tree.commitable(&changeset) { + return Ok(false); + } + + // In javascript there's _verifyExclusive and _verifyShared based on changeset.upgraded, but + // here we do only one. _verifyShared groups together many subsequent changesets into a single + // oplog push, and then flushes in the end only for the whole group. + let bitfield_update: Option = if let Some(block) = &proof.block.as_ref() { + let byte_offset = + match self + .tree + .byte_offset_in_changeset(block.index, &changeset, None)? + { + Either::Right(value) => value, + Either::Left(instructions) => { + let infos = self.storage.read_infos_to_vec(&instructions).await?; + match self.tree.byte_offset_in_changeset( + block.index, + &changeset, + Some(&infos), + )? { + Either::Right(value) => value, + Either::Left(_) => { + return Err(HypercoreError::InvalidOperation { + context: format!( + "Could not read offset for index {} from tree", + block.index + ), + }); + } + } + } + }; + + // Write the value to the block store + let info_to_flush = self.block_store.put(&block.value, byte_offset); + self.storage.flush_info(info_to_flush).await?; + + // Return a bitfield update for the given value + Some(BitfieldUpdate { + drop: false, + start: block.index, + length: 1, + }) + } else { + // Only from DataBlock can there be changes to the bitfield + None + }; + + // Append the changeset to the Oplog + let outcome = self.oplog.append_changeset( + &changeset, + bitfield_update.clone(), + false, + &self.header, + )?; + self.storage.flush_infos(&outcome.infos_to_flush).await?; + self.header = outcome.header; + + if let Some(bitfield_update) = bitfield_update { + // Write to bitfield + self.bitfield.update(&bitfield_update); + + // Contiguous length is known only now + update_contiguous_length(&mut self.header, &self.bitfield, &bitfield_update); + } + + // Commit changeset to in-memory tree + self.tree.commit(changeset)?; + + // Now ready to flush + if self.should_flush_bitfield_and_tree_and_oplog() { + self.flush_bitfield_and_tree_and_oplog(false).await?; + } + Ok(true) + } + + /// Used to fill the nodes field of a `RequestBlock` during + /// synchronization. + #[instrument(err, skip(self))] + pub async fn missing_nodes(&mut self, index: u64) -> Result { + self.missing_nodes_from_merkle_tree_index(index * 2).await + } + + /// Get missing nodes using a merkle tree index. Advanced variant of missing_nodex + /// that allow for special cases of searching directly from the merkle tree. + #[instrument(err, skip(self))] + pub async fn missing_nodes_from_merkle_tree_index( + &mut self, + merkle_tree_index: u64, + ) -> Result { + match self.tree.missing_nodes(merkle_tree_index, None)? { + Either::Right(value) => Ok(value), + Either::Left(instructions) => { + let mut instructions = instructions; + let mut infos: Vec = vec![]; + loop { + infos.extend(self.storage.read_infos_to_vec(&instructions).await?); + match self.tree.missing_nodes(merkle_tree_index, Some(&infos))? { + Either::Right(value) => { + return Ok(value); + } + Either::Left(new_instructions) => { + instructions = new_instructions; + } + } + } + } + } + } + + /// Makes the hypercore read-only by deleting the secret key. Returns true if the + /// hypercore was changed, false if the hypercore was already read-only. This is useful + /// in scenarios where a hypercore should be made immutable after initial values have + /// been stored. + #[instrument(err, skip_all)] + pub async fn make_read_only(&mut self) -> Result { + if self.key_pair.secret.is_some() { + self.key_pair.secret = None; + self.header.key_pair.secret = None; + // Need to flush clearing traces to make sure both oplog slots are cleared + self.flush_bitfield_and_tree_and_oplog(true).await?; + Ok(true) + } else { + Ok(false) + } + } + + async fn byte_range( + &mut self, + index: u64, + initial_infos: Option<&[StoreInfo]>, + ) -> Result { + match self.tree.byte_range(index, initial_infos)? { + Either::Right(value) => Ok(value), + Either::Left(instructions) => { + let mut instructions = instructions; + let mut infos: Vec = vec![]; + loop { + infos.extend(self.storage.read_infos_to_vec(&instructions).await?); + match self.tree.byte_range(index, Some(&infos))? { + Either::Right(value) => { + return Ok(value); + } + Either::Left(new_instructions) => { + instructions = new_instructions; + } + } + } + } + } + } + + async fn create_valueless_proof( + &mut self, + block: Option, + hash: Option, + seek: Option, + upgrade: Option, + ) -> Result { + match self.tree.create_valueless_proof( + block.as_ref(), + hash.as_ref(), + seek.as_ref(), + upgrade.as_ref(), + None, + )? { + Either::Right(value) => Ok(value), + Either::Left(instructions) => { + let mut instructions = instructions; + let mut infos: Vec = vec![]; + loop { + infos.extend(self.storage.read_infos_to_vec(&instructions).await?); + match self.tree.create_valueless_proof( + block.as_ref(), + hash.as_ref(), + seek.as_ref(), + upgrade.as_ref(), + Some(&infos), + )? { + Either::Right(value) => { + return Ok(value); + } + Either::Left(new_instructions) => { + instructions = new_instructions; + } + } + } + } + } + } + + /// Verify a proof received from a peer. Returns a changeset that should be + /// applied. + async fn verify_proof(&mut self, proof: &Proof) -> Result { + match self.tree.verify_proof(proof, &self.key_pair.public, None)? { + Either::Right(value) => Ok(value), + Either::Left(instructions) => { + let infos = self.storage.read_infos_to_vec(&instructions).await?; + match self + .tree + .verify_proof(proof, &self.key_pair.public, Some(&infos))? + { + Either::Right(value) => Ok(value), + Either::Left(_) => Err(HypercoreError::InvalidOperation { + context: "Could not verify proof from tree".to_string(), + }), + } + } + } + } + + fn should_flush_bitfield_and_tree_and_oplog(&mut self) -> bool { + if self.skip_flush_count == 0 + || self.oplog.entries_byte_length >= MAX_OPLOG_ENTRIES_BYTE_SIZE + { + self.skip_flush_count = 3; + true + } else { + self.skip_flush_count -= 1; + false + } + } + + async fn flush_bitfield_and_tree_and_oplog( + &mut self, + clear_traces: bool, + ) -> Result<(), HypercoreError> { + let infos = self.bitfield.flush(); + self.storage.flush_infos(&infos).await?; + let infos = self.tree.flush(); + self.storage.flush_infos(&infos).await?; + let infos = self.oplog.flush(&self.header, clear_traces)?; + self.storage.flush_infos(&infos).await?; + Ok(()) + } +} + +fn update_contiguous_length( + header: &mut Header, + bitfield: &Bitfield, + bitfield_update: &BitfieldUpdate, +) { + let end = bitfield_update.start + bitfield_update.length; + let mut c = header.hints.contiguous_length; + if bitfield_update.drop { + if c <= end && c > bitfield_update.start { + c = bitfield_update.start; + } + } else if c <= end && c >= bitfield_update.start { + c = end; + while bitfield.get(c) { + c += 1; + } + } + + if c != header.hints.contiguous_length { + header.hints.contiguous_length = c; + } +} + +#[cfg(test)] +mod tests { + use super::*; + use random_access_memory::RandomAccessMemory; + + #[async_std::test] + async fn core_create_proof_block_only() -> Result<(), HypercoreError> { + let mut hypercore = create_hypercore_with_data(10).await?; + + let proof = hypercore + .create_proof(Some(RequestBlock { index: 4, nodes: 2 }), None, None, None) + .await? + .unwrap(); + let block = proof.block.unwrap(); + assert_eq!(proof.upgrade, None); + assert_eq!(proof.seek, None); + assert_eq!(block.index, 4); + assert_eq!(block.nodes.len(), 2); + assert_eq!(block.nodes[0].index, 10); + assert_eq!(block.nodes[1].index, 13); + Ok(()) + } + + #[async_std::test] + async fn core_create_proof_block_and_upgrade() -> Result<(), HypercoreError> { + let mut hypercore = create_hypercore_with_data(10).await?; + let proof = hypercore + .create_proof( + Some(RequestBlock { index: 4, nodes: 0 }), + None, + None, + Some(RequestUpgrade { + start: 0, + length: 10, + }), + ) + .await? + .unwrap(); + let block = proof.block.unwrap(); + let upgrade = proof.upgrade.unwrap(); + assert_eq!(proof.seek, None); + assert_eq!(block.index, 4); + assert_eq!(block.nodes.len(), 3); + assert_eq!(block.nodes[0].index, 10); + assert_eq!(block.nodes[1].index, 13); + assert_eq!(block.nodes[2].index, 3); + assert_eq!(upgrade.start, 0); + assert_eq!(upgrade.length, 10); + assert_eq!(upgrade.nodes.len(), 1); + assert_eq!(upgrade.nodes[0].index, 17); + assert_eq!(upgrade.additional_nodes.len(), 0); + Ok(()) + } + + #[async_std::test] + async fn core_create_proof_block_and_upgrade_and_additional() -> Result<(), HypercoreError> { + let mut hypercore = create_hypercore_with_data(10).await?; + let proof = hypercore + .create_proof( + Some(RequestBlock { index: 4, nodes: 0 }), + None, + None, + Some(RequestUpgrade { + start: 0, + length: 8, + }), + ) + .await? + .unwrap(); + let block = proof.block.unwrap(); + let upgrade = proof.upgrade.unwrap(); + assert_eq!(proof.seek, None); + assert_eq!(block.index, 4); + assert_eq!(block.nodes.len(), 3); + assert_eq!(block.nodes[0].index, 10); + assert_eq!(block.nodes[1].index, 13); + assert_eq!(block.nodes[2].index, 3); + assert_eq!(upgrade.start, 0); + assert_eq!(upgrade.length, 8); + assert_eq!(upgrade.nodes.len(), 0); + assert_eq!(upgrade.additional_nodes.len(), 1); + assert_eq!(upgrade.additional_nodes[0].index, 17); + Ok(()) + } + + #[async_std::test] + async fn core_create_proof_block_and_upgrade_from_existing_state() -> Result<(), HypercoreError> + { + let mut hypercore = create_hypercore_with_data(10).await?; + let proof = hypercore + .create_proof( + Some(RequestBlock { index: 1, nodes: 0 }), + None, + None, + Some(RequestUpgrade { + start: 1, + length: 9, + }), + ) + .await? + .unwrap(); + let block = proof.block.unwrap(); + let upgrade = proof.upgrade.unwrap(); + assert_eq!(proof.seek, None); + assert_eq!(block.index, 1); + assert_eq!(block.nodes.len(), 0); + assert_eq!(upgrade.start, 1); + assert_eq!(upgrade.length, 9); + assert_eq!(upgrade.nodes.len(), 3); + assert_eq!(upgrade.nodes[0].index, 5); + assert_eq!(upgrade.nodes[1].index, 11); + assert_eq!(upgrade.nodes[2].index, 17); + assert_eq!(upgrade.additional_nodes.len(), 0); + Ok(()) + } + + #[async_std::test] + async fn core_create_proof_block_and_upgrade_from_existing_state_with_additional( + ) -> Result<(), HypercoreError> { + let mut hypercore = create_hypercore_with_data(10).await?; + let proof = hypercore + .create_proof( + Some(RequestBlock { index: 1, nodes: 0 }), + None, + None, + Some(RequestUpgrade { + start: 1, + length: 5, + }), + ) + .await? + .unwrap(); + let block = proof.block.unwrap(); + let upgrade = proof.upgrade.unwrap(); + assert_eq!(proof.seek, None); + assert_eq!(block.index, 1); + assert_eq!(block.nodes.len(), 0); + assert_eq!(upgrade.start, 1); + assert_eq!(upgrade.length, 5); + assert_eq!(upgrade.nodes.len(), 2); + assert_eq!(upgrade.nodes[0].index, 5); + assert_eq!(upgrade.nodes[1].index, 9); + assert_eq!(upgrade.additional_nodes.len(), 2); + assert_eq!(upgrade.additional_nodes[0].index, 13); + assert_eq!(upgrade.additional_nodes[1].index, 17); + Ok(()) + } + + #[async_std::test] + async fn core_create_proof_block_and_seek_1_no_upgrade() -> Result<(), HypercoreError> { + let mut hypercore = create_hypercore_with_data(10).await?; + let proof = hypercore + .create_proof( + Some(RequestBlock { index: 4, nodes: 2 }), + None, + Some(RequestSeek { bytes: 8 }), + None, + ) + .await? + .unwrap(); + let block = proof.block.unwrap(); + assert_eq!(proof.seek, None); // seek included in block + assert_eq!(proof.upgrade, None); + assert_eq!(block.index, 4); + assert_eq!(block.nodes.len(), 2); + assert_eq!(block.nodes[0].index, 10); + assert_eq!(block.nodes[1].index, 13); + Ok(()) + } + + #[async_std::test] + async fn core_create_proof_block_and_seek_2_no_upgrade() -> Result<(), HypercoreError> { + let mut hypercore = create_hypercore_with_data(10).await?; + let proof = hypercore + .create_proof( + Some(RequestBlock { index: 4, nodes: 2 }), + None, + Some(RequestSeek { bytes: 10 }), + None, + ) + .await? + .unwrap(); + let block = proof.block.unwrap(); + assert_eq!(proof.seek, None); // seek included in block + assert_eq!(proof.upgrade, None); + assert_eq!(block.index, 4); + assert_eq!(block.nodes.len(), 2); + assert_eq!(block.nodes[0].index, 10); + assert_eq!(block.nodes[1].index, 13); + Ok(()) + } + + #[async_std::test] + async fn core_create_proof_block_and_seek_3_no_upgrade() -> Result<(), HypercoreError> { + let mut hypercore = create_hypercore_with_data(10).await?; + let proof = hypercore + .create_proof( + Some(RequestBlock { index: 4, nodes: 2 }), + None, + Some(RequestSeek { bytes: 13 }), + None, + ) + .await? + .unwrap(); + let block = proof.block.unwrap(); + let seek = proof.seek.unwrap(); + assert_eq!(proof.upgrade, None); + assert_eq!(block.index, 4); + assert_eq!(block.nodes.len(), 1); + assert_eq!(block.nodes[0].index, 10); + assert_eq!(seek.nodes.len(), 2); + assert_eq!(seek.nodes[0].index, 12); + assert_eq!(seek.nodes[1].index, 14); + Ok(()) + } + + #[async_std::test] + async fn core_create_proof_block_and_seek_to_tree_no_upgrade() -> Result<(), HypercoreError> { + let mut hypercore = create_hypercore_with_data(16).await?; + let proof = hypercore + .create_proof( + Some(RequestBlock { index: 0, nodes: 4 }), + None, + Some(RequestSeek { bytes: 26 }), + None, + ) + .await? + .unwrap(); + let block = proof.block.unwrap(); + let seek = proof.seek.unwrap(); + assert_eq!(proof.upgrade, None); + assert_eq!(block.nodes.len(), 3); + assert_eq!(block.nodes[0].index, 2); + assert_eq!(block.nodes[1].index, 5); + assert_eq!(block.nodes[2].index, 11); + assert_eq!(seek.nodes.len(), 2); + assert_eq!(seek.nodes[0].index, 19); + assert_eq!(seek.nodes[1].index, 27); + Ok(()) + } + + #[async_std::test] + async fn core_create_proof_block_and_seek_with_upgrade() -> Result<(), HypercoreError> { + let mut hypercore = create_hypercore_with_data(10).await?; + let proof = hypercore + .create_proof( + Some(RequestBlock { index: 4, nodes: 2 }), + None, + Some(RequestSeek { bytes: 13 }), + Some(RequestUpgrade { + start: 8, + length: 2, + }), + ) + .await? + .unwrap(); + let block = proof.block.unwrap(); + let seek = proof.seek.unwrap(); + let upgrade = proof.upgrade.unwrap(); + assert_eq!(block.index, 4); + assert_eq!(block.nodes.len(), 1); + assert_eq!(block.nodes[0].index, 10); + assert_eq!(seek.nodes.len(), 2); + assert_eq!(seek.nodes[0].index, 12); + assert_eq!(seek.nodes[1].index, 14); + assert_eq!(upgrade.nodes.len(), 1); + assert_eq!(upgrade.nodes[0].index, 17); + assert_eq!(upgrade.additional_nodes.len(), 0); + Ok(()) + } + + #[async_std::test] + async fn core_create_proof_seek_with_upgrade() -> Result<(), HypercoreError> { + let mut hypercore = create_hypercore_with_data(10).await?; + let proof = hypercore + .create_proof( + None, + None, + Some(RequestSeek { bytes: 13 }), + Some(RequestUpgrade { + start: 0, + length: 10, + }), + ) + .await? + .unwrap(); + let seek = proof.seek.unwrap(); + let upgrade = proof.upgrade.unwrap(); + assert_eq!(proof.block, None); + assert_eq!(seek.nodes.len(), 4); + assert_eq!(seek.nodes[0].index, 12); + assert_eq!(seek.nodes[1].index, 14); + assert_eq!(seek.nodes[2].index, 9); + assert_eq!(seek.nodes[3].index, 3); + assert_eq!(upgrade.nodes.len(), 1); + assert_eq!(upgrade.nodes[0].index, 17); + assert_eq!(upgrade.additional_nodes.len(), 0); + Ok(()) + } + + #[async_std::test] + async fn core_verify_proof_invalid_signature() -> Result<(), HypercoreError> { + let mut hypercore = create_hypercore_with_data(10).await?; + // Invalid clone hypercore with a different public key + let mut hypercore_clone = create_hypercore_with_data(0).await?; + let proof = hypercore + .create_proof( + None, + Some(RequestBlock { index: 6, nodes: 0 }), + None, + Some(RequestUpgrade { + start: 0, + length: 10, + }), + ) + .await? + .unwrap(); + assert!(hypercore_clone + .verify_and_apply_proof(&proof) + .await + .is_err()); + Ok(()) + } + + #[async_std::test] + async fn core_verify_and_apply_proof() -> Result<(), HypercoreError> { + let mut main = create_hypercore_with_data(10).await?; + let mut clone = create_hypercore_with_data_and_key_pair( + 0, + PartialKeypair { + public: main.key_pair.public, + secret: None, + }, + ) + .await?; + let index = 6; + let nodes = clone.missing_nodes(index).await?; + let proof = main + .create_proof( + None, + Some(RequestBlock { index, nodes }), + None, + Some(RequestUpgrade { + start: 0, + length: 10, + }), + ) + .await? + .unwrap(); + assert!(clone.verify_and_apply_proof(&proof).await?); + let main_info = main.info(); + let clone_info = clone.info(); + assert_eq!(main_info.byte_length, clone_info.byte_length); + assert_eq!(main_info.length, clone_info.length); + assert!(main.get(6).await?.is_some()); + assert!(clone.get(6).await?.is_none()); + + // Fetch data for index 6 and verify it is found + let index = 6; + let nodes = clone.missing_nodes(index).await?; + let proof = main + .create_proof(Some(RequestBlock { index, nodes }), None, None, None) + .await? + .unwrap(); + assert!(clone.verify_and_apply_proof(&proof).await?); + Ok(()) + } + + async fn create_hypercore_with_data( + length: u64, + ) -> Result, HypercoreError> { + let signing_key = generate_signing_key(); + create_hypercore_with_data_and_key_pair( + length, + PartialKeypair { + public: signing_key.verifying_key(), + secret: Some(signing_key), + }, + ) + .await + } + + async fn create_hypercore_with_data_and_key_pair( + length: u64, + key_pair: PartialKeypair, + ) -> Result, HypercoreError> { + let storage = Storage::new_memory().await?; + let mut hypercore = Hypercore::new( + storage, + HypercoreOptions { + key_pair: Some(key_pair), + open: false, + #[cfg(feature = "cache")] + node_cache_options: None, + }, + ) + .await?; + for i in 0..length { + hypercore.append(format!("#{}", i).as_bytes()).await?; + } + Ok(hypercore) + } +} diff --git a/src/crypto/hash.rs b/src/crypto/hash.rs index dc8ba2a8..f744048d 100644 --- a/src/crypto/hash.rs +++ b/src/crypto/hash.rs @@ -1,34 +1,58 @@ -pub use blake2_rfc::blake2b::Blake2bResult; - -use crate::storage::Node; -use blake2_rfc::blake2b::Blake2b; +use blake2::{ + digest::{generic_array::GenericArray, typenum::U32, FixedOutput}, + Blake2b, Blake2bMac, Digest, +}; use byteorder::{BigEndian, WriteBytesExt}; -use ed25519_dalek::PublicKey; +use compact_encoding::State; +use ed25519_dalek::VerifyingKey; use merkle_tree_stream::Node as NodeTrait; use std::convert::AsRef; use std::mem; use std::ops::{Deref, DerefMut}; +use crate::common::Node; + // https://en.wikipedia.org/wiki/Merkle_tree#Second_preimage_attack const LEAF_TYPE: [u8; 1] = [0x00]; const PARENT_TYPE: [u8; 1] = [0x01]; const ROOT_TYPE: [u8; 1] = [0x02]; const HYPERCORE: [u8; 9] = *b"hypercore"; +// These the output of, see `hash_namespace` test below for how they are produced +// https://github.com/holepunchto/hypercore/blob/cf08b72f14ed7d9ef6d497ebb3071ee0ae20967e/lib/caps.js#L16 +const TREE: [u8; 32] = [ + 0x9F, 0xAC, 0x70, 0xB5, 0xC, 0xA1, 0x4E, 0xFC, 0x4E, 0x91, 0xC8, 0x33, 0xB2, 0x4, 0xE7, 0x5B, + 0x8B, 0x5A, 0xAD, 0x8B, 0x58, 0x81, 0xBF, 0xC0, 0xAD, 0xB5, 0xEF, 0x38, 0xA3, 0x27, 0x5B, 0x9C, +]; + +// const DEFAULT_NAMESPACE: [u8; 32] = [ +// 0x41, 0x44, 0xEE, 0xA5, 0x31, 0xE4, 0x83, 0xD5, 0x4E, 0x0C, 0x14, 0xF4, 0xCA, 0x68, 0xE0, 0x64, +// 0x4F, 0x35, 0x53, 0x43, 0xFF, 0x6F, 0xCB, 0x0F, 0x00, 0x52, 0x00, 0xE1, 0x2C, 0xD7, 0x47, 0xCB, +// ]; + +// const MANIFEST: [u8; 32] = [ +// 0xE6, 0x4B, 0x71, 0x08, 0xEA, 0xCC, 0xE4, 0x7C, 0xFC, 0x61, 0xAC, 0x85, 0x05, 0x68, 0xF5, 0x5F, +// 0x8B, 0x15, 0xB8, 0x2E, 0xC5, 0xED, 0x78, 0xC4, 0xEC, 0x59, 0x7B, 0x03, 0x6E, 0x2A, 0x14, 0x98, +// ]; + +pub(crate) type Blake2bResult = GenericArray; +type Blake2b256 = Blake2b; + /// `BLAKE2b` hash. #[derive(Debug, Clone, PartialEq)] -pub struct Hash { +pub(crate) struct Hash { hash: Blake2bResult, } impl Hash { /// Hash a `Leaf` node. - pub fn from_leaf(data: &[u8]) -> Self { + #[allow(dead_code)] + pub(crate) fn from_leaf(data: &[u8]) -> Self { let size = u64_as_be(data.len() as u64); - let mut hasher = Blake2b::new(32); - hasher.update(&LEAF_TYPE); - hasher.update(&size); + let mut hasher = Blake2b256::new(); + hasher.update(LEAF_TYPE); + hasher.update(size); hasher.update(data); Self { @@ -37,18 +61,19 @@ impl Hash { } /// Hash two `Leaf` nodes hashes together to form a `Parent` hash. - pub fn from_hashes(left: &Node, right: &Node) -> Self { + #[allow(dead_code)] + pub(crate) fn from_hashes(left: &Node, right: &Node) -> Self { let (node1, node2) = if left.index <= right.index { (left, right) } else { (right, left) }; - let size = u64_as_be((node1.length + node2.length) as u64); + let size = u64_as_be(node1.length + node2.length); - let mut hasher = Blake2b::new(32); - hasher.update(&PARENT_TYPE); - hasher.update(&size); + let mut hasher = Blake2b256::new(); + hasher.update(PARENT_TYPE); + hasher.update(size); hasher.update(node1.hash()); hasher.update(node2.hash()); @@ -59,25 +84,28 @@ impl Hash { /// Hash a public key. Useful to find the key you're looking for on a public /// network without leaking the key itself. - pub fn for_discovery_key(public_key: PublicKey) -> Self { - let mut hasher = Blake2b::with_key(32, public_key.as_bytes()); - hasher.update(&HYPERCORE); + #[allow(dead_code)] + pub(crate) fn for_discovery_key(public_key: VerifyingKey) -> Self { + let mut hasher = + Blake2bMac::::new_with_salt_and_personal(public_key.as_bytes(), &[], &[]).unwrap(); + blake2::digest::Update::update(&mut hasher, &HYPERCORE); Self { - hash: hasher.finalize(), + hash: hasher.finalize_fixed(), } } /// Hash a vector of `Root` nodes. // Called `crypto.tree()` in the JS implementation. - pub fn from_roots(roots: &[impl AsRef]) -> Self { - let mut hasher = Blake2b::new(32); - hasher.update(&ROOT_TYPE); + #[allow(dead_code)] + pub(crate) fn from_roots(roots: &[impl AsRef]) -> Self { + let mut hasher = Blake2b256::new(); + hasher.update(ROOT_TYPE); for node in roots { let node = node.as_ref(); hasher.update(node.hash()); - hasher.update(&u64_as_be((node.index()) as u64)); - hasher.update(&u64_as_be((node.len()) as u64)); + hasher.update(u64_as_be(node.index())); + hasher.update(u64_as_be(node.len())); } Self { @@ -86,8 +114,78 @@ impl Hash { } /// Returns a byte slice of this `Hash`'s contents. - pub fn as_bytes(&self) -> &[u8] { - self.hash.as_bytes() + pub(crate) fn as_bytes(&self) -> &[u8] { + self.hash.as_slice() + } + + // NB: The following methods mirror Javascript naming in + // https://github.com/mafintosh/hypercore-crypto/blob/master/index.js + // for v10 that use LE bytes. + + /// Hash data + pub(crate) fn data(data: &[u8]) -> Self { + let (mut state, mut size) = State::new_with_size(8); + state + .encode_u64(data.len() as u64, &mut size) + .expect("Encoding u64 should not fail"); + + let mut hasher = Blake2b256::new(); + hasher.update(LEAF_TYPE); + hasher.update(&size); + hasher.update(data); + + Self { + hash: hasher.finalize(), + } + } + + /// Hash a parent + pub(crate) fn parent(left: &Node, right: &Node) -> Self { + let (node1, node2) = if left.index <= right.index { + (left, right) + } else { + (right, left) + }; + + let (mut state, mut size) = State::new_with_size(8); + state + .encode_u64(node1.length + node2.length, &mut size) + .expect("Encoding u64 should not fail"); + + let mut hasher = Blake2b256::new(); + hasher.update(PARENT_TYPE); + hasher.update(&size); + hasher.update(node1.hash()); + hasher.update(node2.hash()); + + Self { + hash: hasher.finalize(), + } + } + + /// Hash a tree + pub(crate) fn tree(roots: &[impl AsRef]) -> Self { + let mut hasher = Blake2b256::new(); + hasher.update(ROOT_TYPE); + + for node in roots { + let node = node.as_ref(); + let (mut state, mut buffer) = State::new_with_size(16); + state + .encode_u64(node.index(), &mut buffer) + .expect("Encoding u64 should not fail"); + state + .encode_u64(node.len(), &mut buffer) + .expect("Encoding u64 should not fail"); + + hasher.update(node.hash()); + hasher.update(&buffer[..8]); + hasher.update(&buffer[8..]); + } + + Self { + hash: hasher.finalize(), + } } } @@ -111,6 +209,25 @@ impl DerefMut for Hash { } } +/// Create a signable buffer for tree. This is treeSignable in Javascript. +/// See https://github.com/hypercore-protocol/hypercore/blob/70b271643c4e4b1e5ecae5bb579966dfe6361ff3/lib/caps.js#L17 +pub(crate) fn signable_tree(hash: &[u8], length: u64, fork: u64) -> Box<[u8]> { + let (mut state, mut buffer) = State::new_with_size(80); + state + .encode_fixed_32(&TREE, &mut buffer) + .expect("Should be able "); + state + .encode_fixed_32(hash, &mut buffer) + .expect("Encoding fixed 32 bytes should not fail"); + state + .encode_u64(length, &mut buffer) + .expect("Encoding u64 should not fail"); + state + .encode_u64(fork, &mut buffer) + .expect("Encoding u64 should not fail"); + buffer +} + #[cfg(test)] mod tests { use super::*; @@ -118,6 +235,14 @@ mod tests { use self::data_encoding::HEXLOWER; use data_encoding; + fn hash_with_extra_byte(data: &[u8], byte: u8) -> Box<[u8]> { + let mut hasher = Blake2b256::new(); + hasher.update(data); + hasher.update([byte]); + let hash = hasher.finalize(); + hash.as_slice().into() + } + fn hex_bytes(hex: &str) -> Vec { HEXLOWER.decode(hex.as_bytes()).unwrap() } @@ -172,7 +297,7 @@ mod tests { #[test] fn discovery_key_hashing() -> Result<(), ed25519_dalek::SignatureError> { - let public_key = PublicKey::from_bytes(&[ + let public_key = VerifyingKey::from_bytes(&[ 119, 143, 141, 149, 81, 117, 201, 46, 76, 237, 94, 79, 85, 99, 246, 155, 254, 192, 200, 108, 198, 246, 112, 53, 44, 69, 121, 67, 102, 111, 230, 57, ])?; @@ -186,4 +311,52 @@ mod tests { Ok(()) } + + // The following uses test data from + // https://github.com/mafintosh/hypercore-crypto/blob/master/test.js + + #[test] + fn hash_leaf() { + let data = b"hello world"; + check_hash( + Hash::data(data), + "9f1b578fd57a4df015493d2886aec9600eef913c3bb009768c7f0fb875996308", + ); + } + + #[test] + fn hash_parent() { + let data = b"hello world"; + let len = data.len() as u64; + let node1 = Node::new(0, Hash::data(data).as_bytes().to_vec(), len); + let node2 = Node::new(1, Hash::data(data).as_bytes().to_vec(), len); + check_hash( + Hash::parent(&node1, &node2), + "3ad0c9b58b771d1b7707e1430f37c23a23dd46e0c7c3ab9c16f79d25f7c36804", + ); + } + + #[test] + fn hash_tree() { + let hash: [u8; 32] = [0; 32]; + let node1 = Node::new(3, hash.to_vec(), 11); + let node2 = Node::new(9, hash.to_vec(), 2); + check_hash( + Hash::tree(&[&node1, &node2]), + "0e576a56b478cddb6ffebab8c494532b6de009466b2e9f7af9143fc54b9eaa36", + ); + } + + // This is the rust version from + // https://github.com/hypercore-protocol/hypercore/blob/70b271643c4e4b1e5ecae5bb579966dfe6361ff3/lib/caps.js + // and validates that our arrays match + #[test] + fn hash_namespace() { + let mut hasher = Blake2b256::new(); + hasher.update(HYPERCORE); + let hash = hasher.finalize(); + let ns = hash.as_slice(); + let tree: Box<[u8]> = { hash_with_extra_byte(ns, 0) }; + assert_eq!(tree, TREE.into()); + } } diff --git a/src/crypto/key_pair.rs b/src/crypto/key_pair.rs index 53503afb..683cb689 100644 --- a/src/crypto/key_pair.rs +++ b/src/crypto/key_pair.rs @@ -1,41 +1,57 @@ //! Generate an `Ed25519` keypair. -pub use ed25519_dalek::{ExpandedSecretKey, Keypair, PublicKey, SecretKey, Signature, Verifier}; +use ed25519_dalek::{Signature, Signer, SigningKey, Verifier, VerifyingKey}; +use rand::rngs::OsRng; -use anyhow::{bail, ensure, Result}; -use rand::rngs::{OsRng, StdRng}; -use rand::SeedableRng; +use crate::HypercoreError; + +/// Key pair where for read-only hypercores the secret key can also be missing. +#[derive(Debug, Clone)] +pub struct PartialKeypair { + /// Public key + pub public: VerifyingKey, + /// Secret key. If None, the hypercore is read-only. + pub secret: Option, +} /// Generate a new `Ed25519` key pair. -pub fn generate() -> Keypair { - let mut rng = StdRng::from_rng(OsRng::default()).unwrap(); - Keypair::generate(&mut rng) +pub fn generate() -> SigningKey { + let mut csprng = OsRng; + SigningKey::generate(&mut csprng) } /// Sign a byte slice using a keypair's private key. -pub fn sign(public_key: &PublicKey, secret: &SecretKey, msg: &[u8]) -> Signature { - ExpandedSecretKey::from(secret).sign(msg, public_key) +pub fn sign(signing_key: &SigningKey, msg: &[u8]) -> Signature { + signing_key.sign(msg) } /// Verify a signature on a message with a keypair's public key. -pub fn verify(public: &PublicKey, msg: &[u8], sig: Option<&Signature>) -> Result<()> { +pub fn verify( + public: &VerifyingKey, + msg: &[u8], + sig: Option<&Signature>, +) -> Result<(), HypercoreError> { match sig { - None => bail!("Signature verification failed"), + None => Err(HypercoreError::InvalidSignature { + context: "No signature provided.".to_string(), + }), Some(sig) => { - ensure!( - public.verify(msg, sig).is_ok(), - "Signature verification failed" - ); - Ok(()) + if public.verify(msg, sig).is_ok() { + Ok(()) + } else { + Err(HypercoreError::InvalidSignature { + context: "Signature could not be verified.".to_string(), + }) + } } } } #[test] fn can_verify_messages() { - let keypair = generate(); + let signing_key = generate(); let from = b"hello"; - let sig = sign(&keypair.public, &keypair.secret, from); - verify(&keypair.public, from, Some(&sig)).unwrap(); - verify(&keypair.public, b"oops", Some(&sig)).unwrap_err(); + let sig = sign(&signing_key, from); + verify(&signing_key.verifying_key(), from, Some(&sig)).unwrap(); + verify(&signing_key.verifying_key(), b"oops", Some(&sig)).unwrap_err(); } diff --git a/src/crypto/manifest.rs b/src/crypto/manifest.rs new file mode 100644 index 00000000..b3900c5a --- /dev/null +++ b/src/crypto/manifest.rs @@ -0,0 +1,43 @@ +// These the output of the following link: +// https://github.com/holepunchto/hypercore/blob/cf08b72f14ed7d9ef6d497ebb3071ee0ae20967e/lib/caps.js#L16 + +const DEFAULT_NAMESPACE: [u8; 32] = [ + 0x41, 0x44, 0xEE, 0xA5, 0x31, 0xE4, 0x83, 0xD5, 0x4E, 0x0C, 0x14, 0xF4, 0xCA, 0x68, 0xE0, 0x64, + 0x4F, 0x35, 0x53, 0x43, 0xFF, 0x6F, 0xCB, 0x0F, 0x00, 0x52, 0x00, 0xE1, 0x2C, 0xD7, 0x47, 0xCB, +]; + +// TODO: Eventually this would be used in manifestHash +// https://github.com/holepunchto/hypercore/blob/cf08b72f14ed7d9ef6d497ebb3071ee0ae20967e/lib/manifest.js#L211 +// +// const MANIFEST: [u8; 32] = [ +// 0xE6, 0x4B, 0x71, 0x08, 0xEA, 0xCC, 0xE4, 0x7C, 0xFC, 0x61, 0xAC, 0x85, 0x05, 0x68, 0xF5, 0x5F, +// 0x8B, 0x15, 0xB8, 0x2E, 0xC5, 0xED, 0x78, 0xC4, 0xEC, 0x59, 0x7B, 0x03, 0x6E, 0x2A, 0x14, 0x98, +// ]; + +#[derive(Debug, Clone)] +pub(crate) struct Manifest { + pub(crate) hash: String, + // TODO: In v11 can be static + // pub(crate) static_core: Option, + pub(crate) signer: ManifestSigner, + // TODO: In v11 can have multiple signers + // pub(crate) multiple_signers: Option, +} + +#[derive(Debug, Clone)] +pub(crate) struct ManifestSigner { + pub(crate) signature: String, + pub(crate) namespace: [u8; 32], + pub(crate) public_key: [u8; 32], +} + +pub(crate) fn default_signer_manifest(public_key: [u8; 32]) -> Manifest { + Manifest { + hash: "blake2b".to_string(), + signer: ManifestSigner { + signature: "ed25519".to_string(), + namespace: DEFAULT_NAMESPACE, + public_key, + }, + } +} diff --git a/src/crypto/merkle.rs b/src/crypto/merkle.rs deleted file mode 100644 index a55a7a6a..00000000 --- a/src/crypto/merkle.rs +++ /dev/null @@ -1,71 +0,0 @@ -use crate::crypto::Hash; -use crate::storage::Node; -use merkle_tree_stream::{HashMethods, MerkleTreeStream, NodeKind, PartialNode}; -use std::sync::Arc; - -#[derive(Debug)] -struct H; - -impl HashMethods for H { - type Node = Node; - type Hash = Hash; - - fn leaf(&self, leaf: &PartialNode, _roots: &[Arc]) -> Self::Hash { - match leaf.data() { - NodeKind::Leaf(data) => Hash::from_leaf(&data), - NodeKind::Parent => unreachable!(), - } - } - - fn parent(&self, left: &Self::Node, right: &Self::Node) -> Self::Hash { - Hash::from_hashes(left, right) - } -} - -/// Merkle Tree Stream -#[derive(Debug)] -pub struct Merkle { - stream: MerkleTreeStream, - nodes: Vec>, -} - -impl Default for Merkle { - fn default() -> Self { - Merkle::new() - } -} - -impl Merkle { - /// Create a new instance. - // TODO: figure out the right allocation size for `roots` and `nodes`. - pub fn new() -> Self { - Self { - nodes: vec![], - stream: MerkleTreeStream::new(H, vec![]), - } - } - - pub fn from_nodes(nodes: Vec) -> Self { - let nodes = nodes.into_iter().map(Arc::new).collect::>(); - Self { - stream: MerkleTreeStream::new(H, nodes.clone()), - nodes, - } - } - - /// Access the next item. - // TODO: remove extra conversion alloc. - pub fn next(&mut self, data: &[u8]) { - self.stream.next(&data, &mut self.nodes); - } - - /// Get the roots vector. - pub fn roots(&self) -> &Vec> { - self.stream.roots() - } - - /// Get the nodes from the struct. - pub fn nodes(&self) -> &Vec> { - &self.nodes - } -} diff --git a/src/crypto/mod.rs b/src/crypto/mod.rs index e6c10c65..1bf2ab5b 100644 --- a/src/crypto/mod.rs +++ b/src/crypto/mod.rs @@ -2,10 +2,8 @@ mod hash; mod key_pair; -mod merkle; +mod manifest; -pub use self::hash::Hash; -pub use self::key_pair::{ - generate as generate_keypair, sign, verify, PublicKey, SecretKey, Signature, -}; -pub use self::merkle::Merkle; +pub(crate) use hash::{signable_tree, Hash}; +pub use key_pair::{generate as generate_signing_key, sign, verify, PartialKeypair}; +pub(crate) use manifest::{default_signer_manifest, Manifest, ManifestSigner}; diff --git a/src/crypto/root.rs b/src/crypto/root.rs deleted file mode 100644 index 12a6713d..00000000 --- a/src/crypto/root.rs +++ /dev/null @@ -1,52 +0,0 @@ -//! Root node type. Functions as an intermediate type for hash methods that -//! operate on Root. -//! -//! ## Why? -//! Both `merkle-tree-stream` and `hypercore` have `Node` types. Even if in most -//! cases these types don't overlap, in a select few cases both need to be -//! passed to the same function. So in order to facilitate that, the `Root` type -//! is created. It's entirely borrowed, and allows passing either type down into -//! a function that accepts `Root`. - -/// Root node found in flat-tree. -pub struct Root<'a> { - index: &'a u64, - length: &'a u64, - hash: &'a [u8], -} - -impl<'a> Root<'a> { - /// Create a new instance. - #[inline] - pub fn new(index: &'a u64, length: &'a u64, hash: &'a [u8]) -> Self { - Self { - index, - length, - hash, - } - } - - /// Get the index at which this root was found inside a `flat-tree`. - #[inline] - pub fn index(&self) -> &u64 { - &self.index - } - - /// Get the lenght of the data. - #[inline] - pub fn len(&self) -> &u64 { - &self.length - } - - /// Check if the content is empty. - #[inline] - pub fn is_empty(&self) -> bool { - *self.length == 0 - } - - /// Get the hash. - #[inline] - pub fn hash(&self) -> &'a [u8] { - &self.hash - } -} diff --git a/src/data/mod.rs b/src/data/mod.rs new file mode 100644 index 00000000..fa70a904 --- /dev/null +++ b/src/data/mod.rs @@ -0,0 +1,46 @@ +use crate::common::{NodeByteRange, Store, StoreInfo, StoreInfoInstruction}; +use futures::future::Either; + +/// Block store +#[derive(Debug, Default)] +pub(crate) struct BlockStore {} + +impl BlockStore { + pub(crate) fn append_batch, B: AsRef<[A]>>( + &self, + batch: B, + batch_length: usize, + byte_length: u64, + ) -> StoreInfo { + let mut buffer: Vec = Vec::with_capacity(batch_length); + for data in batch.as_ref().iter() { + buffer.extend_from_slice(data.as_ref()); + } + StoreInfo::new_content(Store::Data, byte_length, &buffer) + } + + pub(crate) fn put(&self, value: &[u8], offset: u64) -> StoreInfo { + StoreInfo::new_content(Store::Data, offset, value) + } + + pub(crate) fn read( + &self, + byte_range: &NodeByteRange, + info: Option, + ) -> Either> { + if let Some(info) = info { + Either::Right(info.data.unwrap()) + } else { + Either::Left(StoreInfoInstruction::new_content( + Store::Data, + byte_range.index, + byte_range.length, + )) + } + } + + /// Clears a segment, returns infos to write to storage. + pub(crate) fn clear(&mut self, start: u64, length: u64) -> StoreInfo { + StoreInfo::new_delete(Store::Data, start, length) + } +} diff --git a/src/encoding.rs b/src/encoding.rs new file mode 100644 index 00000000..ed049a65 --- /dev/null +++ b/src/encoding.rs @@ -0,0 +1,370 @@ +//! Hypercore-specific compact encodings +pub use compact_encoding::{CompactEncoding, EncodingError, EncodingErrorKind, State}; +use std::convert::TryInto; +use std::ops::{Deref, DerefMut}; + +use crate::{ + crypto::{Manifest, ManifestSigner}, + DataBlock, DataHash, DataSeek, DataUpgrade, Node, RequestBlock, RequestSeek, RequestUpgrade, +}; + +#[derive(Debug, Clone)] +/// Wrapper struct for compact_encoding::State +pub struct HypercoreState(pub State); + +impl Default for HypercoreState { + /// Passthrought to compact_encoding + fn default() -> Self { + Self::new() + } +} + +impl HypercoreState { + /// Passthrought to compact_encoding + pub fn new() -> HypercoreState { + HypercoreState(State::new()) + } + + /// Passthrought to compact_encoding + pub fn new_with_size(size: usize) -> (HypercoreState, Box<[u8]>) { + let (state, buffer) = State::new_with_size(size); + (HypercoreState(state), buffer) + } + + /// Passthrought to compact_encoding + pub fn new_with_start_and_end(start: usize, end: usize) -> HypercoreState { + HypercoreState(State::new_with_start_and_end(start, end)) + } + + /// Passthrought to compact_encoding + pub fn from_buffer(buffer: &[u8]) -> HypercoreState { + HypercoreState(State::from_buffer(buffer)) + } +} + +impl Deref for HypercoreState { + type Target = State; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl DerefMut for HypercoreState { + fn deref_mut(&mut self) -> &mut State { + &mut self.0 + } +} + +impl CompactEncoding for HypercoreState { + fn preencode(&mut self, value: &Node) -> Result { + self.0.preencode(&value.index)?; + self.0.preencode(&value.length)?; + self.0.preencode_fixed_32() + } + + fn encode(&mut self, value: &Node, buffer: &mut [u8]) -> Result { + self.0.encode(&value.index, buffer)?; + self.0.encode(&value.length, buffer)?; + self.0.encode_fixed_32(&value.hash, buffer) + } + + fn decode(&mut self, buffer: &[u8]) -> Result { + let index: u64 = self.0.decode(buffer)?; + let length: u64 = self.0.decode(buffer)?; + let hash: Box<[u8]> = self.0.decode_fixed_32(buffer)?; + Ok(Node::new(index, hash.to_vec(), length)) + } +} + +impl CompactEncoding> for HypercoreState { + fn preencode(&mut self, value: &Vec) -> Result { + let len = value.len(); + self.0.preencode(&len)?; + for val in value { + self.preencode(val)?; + } + Ok(self.end()) + } + + fn encode(&mut self, value: &Vec, buffer: &mut [u8]) -> Result { + let len = value.len(); + self.0.encode(&len, buffer)?; + for val in value { + self.encode(val, buffer)?; + } + Ok(self.start()) + } + + fn decode(&mut self, buffer: &[u8]) -> Result, EncodingError> { + let len: usize = self.0.decode(buffer)?; + let mut value = Vec::with_capacity(len); + for _ in 0..len { + value.push(self.decode(buffer)?); + } + Ok(value) + } +} + +impl CompactEncoding for HypercoreState { + fn preencode(&mut self, value: &RequestBlock) -> Result { + self.0.preencode(&value.index)?; + self.0.preencode(&value.nodes) + } + + fn encode(&mut self, value: &RequestBlock, buffer: &mut [u8]) -> Result { + self.0.encode(&value.index, buffer)?; + self.0.encode(&value.nodes, buffer) + } + + fn decode(&mut self, buffer: &[u8]) -> Result { + let index: u64 = self.0.decode(buffer)?; + let nodes: u64 = self.0.decode(buffer)?; + Ok(RequestBlock { index, nodes }) + } +} + +impl CompactEncoding for HypercoreState { + fn preencode(&mut self, value: &RequestSeek) -> Result { + self.0.preencode(&value.bytes) + } + + fn encode(&mut self, value: &RequestSeek, buffer: &mut [u8]) -> Result { + self.0.encode(&value.bytes, buffer) + } + + fn decode(&mut self, buffer: &[u8]) -> Result { + let bytes: u64 = self.0.decode(buffer)?; + Ok(RequestSeek { bytes }) + } +} + +impl CompactEncoding for HypercoreState { + fn preencode(&mut self, value: &RequestUpgrade) -> Result { + self.0.preencode(&value.start)?; + self.0.preencode(&value.length) + } + + fn encode( + &mut self, + value: &RequestUpgrade, + buffer: &mut [u8], + ) -> Result { + self.0.encode(&value.start, buffer)?; + self.0.encode(&value.length, buffer) + } + + fn decode(&mut self, buffer: &[u8]) -> Result { + let start: u64 = self.0.decode(buffer)?; + let length: u64 = self.0.decode(buffer)?; + Ok(RequestUpgrade { start, length }) + } +} + +impl CompactEncoding for HypercoreState { + fn preencode(&mut self, value: &DataBlock) -> Result { + self.0.preencode(&value.index)?; + self.0.preencode(&value.value)?; + self.preencode(&value.nodes) + } + + fn encode(&mut self, value: &DataBlock, buffer: &mut [u8]) -> Result { + self.0.encode(&value.index, buffer)?; + self.0.encode(&value.value, buffer)?; + self.encode(&value.nodes, buffer) + } + + fn decode(&mut self, buffer: &[u8]) -> Result { + let index: u64 = self.0.decode(buffer)?; + let value: Vec = self.0.decode(buffer)?; + let nodes: Vec = self.decode(buffer)?; + Ok(DataBlock { + index, + value, + nodes, + }) + } +} + +impl CompactEncoding for HypercoreState { + fn preencode(&mut self, value: &DataHash) -> Result { + self.0.preencode(&value.index)?; + self.preencode(&value.nodes) + } + + fn encode(&mut self, value: &DataHash, buffer: &mut [u8]) -> Result { + self.0.encode(&value.index, buffer)?; + self.encode(&value.nodes, buffer) + } + + fn decode(&mut self, buffer: &[u8]) -> Result { + let index: u64 = self.0.decode(buffer)?; + let nodes: Vec = self.decode(buffer)?; + Ok(DataHash { index, nodes }) + } +} + +impl CompactEncoding for HypercoreState { + fn preencode(&mut self, value: &DataSeek) -> Result { + self.0.preencode(&value.bytes)?; + self.preencode(&value.nodes) + } + + fn encode(&mut self, value: &DataSeek, buffer: &mut [u8]) -> Result { + self.0.encode(&value.bytes, buffer)?; + self.encode(&value.nodes, buffer) + } + + fn decode(&mut self, buffer: &[u8]) -> Result { + let bytes: u64 = self.0.decode(buffer)?; + let nodes: Vec = self.decode(buffer)?; + Ok(DataSeek { bytes, nodes }) + } +} + +impl CompactEncoding for HypercoreState { + fn preencode(&mut self, value: &DataUpgrade) -> Result { + self.0.preencode(&value.start)?; + self.0.preencode(&value.length)?; + self.preencode(&value.nodes)?; + self.preencode(&value.additional_nodes)?; + self.0.preencode(&value.signature) + } + + fn encode(&mut self, value: &DataUpgrade, buffer: &mut [u8]) -> Result { + self.0.encode(&value.start, buffer)?; + self.0.encode(&value.length, buffer)?; + self.encode(&value.nodes, buffer)?; + self.encode(&value.additional_nodes, buffer)?; + self.0.encode(&value.signature, buffer) + } + + fn decode(&mut self, buffer: &[u8]) -> Result { + let start: u64 = self.0.decode(buffer)?; + let length: u64 = self.0.decode(buffer)?; + let nodes: Vec = self.decode(buffer)?; + let additional_nodes: Vec = self.decode(buffer)?; + let signature: Vec = self.0.decode(buffer)?; + Ok(DataUpgrade { + start, + length, + nodes, + additional_nodes, + signature, + }) + } +} + +impl CompactEncoding for State { + fn preencode(&mut self, value: &Manifest) -> Result { + self.add_end(1)?; // Version + self.add_end(1)?; // hash in one byte + self.add_end(1)?; // type in one byte + self.preencode(&value.signer) + } + + fn encode(&mut self, value: &Manifest, buffer: &mut [u8]) -> Result { + self.set_byte_to_buffer(0, buffer)?; // Version + if &value.hash == "blake2b" { + self.set_byte_to_buffer(0, buffer)?; // Version + } else { + return Err(EncodingError::new( + EncodingErrorKind::InvalidData, + &format!("Unknown hash: {}", &value.hash), + )); + } + // Type. 0: static, 1: signer, 2: multiple signers + self.set_byte_to_buffer(1, buffer)?; // Version + self.encode(&value.signer, buffer) + } + + fn decode(&mut self, buffer: &[u8]) -> Result { + let version: u8 = self.decode_u8(buffer)?; + if version != 0 { + panic!("Unknown manifest version {}", version); + } + let hash_id: u8 = self.decode_u8(buffer)?; + let hash: String = if hash_id != 0 { + return Err(EncodingError::new( + EncodingErrorKind::InvalidData, + &format!("Unknown hash id: {hash_id}"), + )); + } else { + "blake2b".to_string() + }; + + let manifest_type: u8 = self.decode_u8(buffer)?; + if manifest_type != 1 { + return Err(EncodingError::new( + EncodingErrorKind::InvalidData, + &format!("Unknown manifest type: {manifest_type}"), + )); + } + let signer: ManifestSigner = self.decode(buffer)?; + + Ok(Manifest { hash, signer }) + } +} + +impl CompactEncoding for State { + fn preencode(&mut self, _value: &ManifestSigner) -> Result { + self.add_end(1)?; // Signature + self.preencode_fixed_32()?; + self.preencode_fixed_32() + } + + fn encode( + &mut self, + value: &ManifestSigner, + buffer: &mut [u8], + ) -> Result { + if &value.signature == "ed25519" { + self.set_byte_to_buffer(0, buffer)?; + } else { + return Err(EncodingError::new( + EncodingErrorKind::InvalidData, + &format!("Unknown signature type: {}", &value.signature), + )); + } + self.encode_fixed_32(&value.namespace, buffer)?; + self.encode_fixed_32(&value.public_key, buffer) + } + + fn decode(&mut self, buffer: &[u8]) -> Result { + let signature_id: u8 = self.decode_u8(buffer)?; + let signature: String = if signature_id != 0 { + return Err(EncodingError::new( + EncodingErrorKind::InvalidData, + &format!("Unknown signature id: {signature_id}"), + )); + } else { + "ed25519".to_string() + }; + let namespace: [u8; 32] = + self.decode_fixed_32(buffer)? + .to_vec() + .try_into() + .map_err(|_err| { + EncodingError::new( + EncodingErrorKind::InvalidData, + "Invalid namespace in manifest signer", + ) + })?; + let public_key: [u8; 32] = + self.decode_fixed_32(buffer)? + .to_vec() + .try_into() + .map_err(|_err| { + EncodingError::new( + EncodingErrorKind::InvalidData, + "Invalid public key in manifest signer", + ) + })?; + + Ok(ManifestSigner { + signature, + namespace, + public_key, + }) + } +} diff --git a/src/event.rs b/src/event.rs deleted file mode 100644 index 37d072b7..00000000 --- a/src/event.rs +++ /dev/null @@ -1,3 +0,0 @@ -/// Events emitted. -#[derive(Debug, Clone, PartialEq)] -pub enum Event {} diff --git a/src/feed.rs b/src/feed.rs deleted file mode 100644 index afa14876..00000000 --- a/src/feed.rs +++ /dev/null @@ -1,676 +0,0 @@ -//! Hypercore's main abstraction. Exposes an append-only, secure log structure. - -use crate::feed_builder::FeedBuilder; -use crate::replicate::{Message, Peer}; -pub use crate::storage::{Node, NodeTrait, Storage, Store}; - -use crate::audit::Audit; -use crate::bitfield::Bitfield; -use crate::crypto::{ - generate_keypair, sign, verify, Hash, Merkle, PublicKey, SecretKey, Signature, -}; -use crate::proof::Proof; -use anyhow::{bail, ensure, Result}; -use flat_tree as flat; -use pretty_hash::fmt as pretty_fmt; -use random_access_disk::RandomAccessDisk; -use random_access_memory::RandomAccessMemory; -use random_access_storage::RandomAccess; -use tree_index::TreeIndex; - -use std::borrow::Borrow; -use std::cmp; -use std::fmt::{self, Debug, Display}; -use std::ops::Range; -use std::path::Path; -use std::sync::Arc; - -/// Feed is an append-only log structure. -/// -/// To read an entry from a `Feed` you only need to know its [PublicKey], to write to a `Feed` -/// you must also have its [SecretKey]. The [SecretKey] should not be shared unless you know -/// what you're doing as only one client should be able to write to a single `Feed`. -/// If 2 seperate clients write conflicting information to the same `Feed` it will become corupted. -/// The feed needs an implementation of RandomAccess as a storage backing for the entrys added to it. -/// -/// There are several ways to construct a `Feed` -/// -/// __If you have a `Feed`'s [PublicKey], but have not opened a given `Feed` before__ -/// Use [builder] to initalize a new local `Feed` instance. You will not be able to write to this -/// feed. -/// -/// __If you want to create a new `Feed`__ -/// Use [with_storage] and `Feed` will create a new [SecretKey]/[PublicKey] keypair and store it -/// in the [Storage] -/// -/// __If you want to reopen a `Feed` you have previously opened__ -/// Use [with_storage], giving it a [Storage] that contains the previously opened `Feed` -/// -/// these references can be changed to the +nightly version, as docs.rs uses +nightly -/// -/// [SecretKey]: ed25519_dalek::SecretKey -/// [PublicKey]: ed25519_dalek::PublicKey -/// [RandomAccess]: random_access_storage::RandomAccess -/// [Storage]: crate::storage::Storage -/// [builder]: crate::feed_builder::FeedBuilder -/// [with_storage]: crate::feed::Feed::with_storage -#[derive(Debug)] -pub struct Feed -where - T: RandomAccess> + Debug, -{ - /// Merkle tree instance. - pub(crate) merkle: Merkle, - pub(crate) public_key: PublicKey, - pub(crate) secret_key: Option, - pub(crate) storage: Storage, - /// Total length of raw data stored in bytes. - pub(crate) byte_length: u64, - /// Total number of entries stored in the `Feed` - pub(crate) length: u64, - /// Bitfield to keep track of which data we own. - pub(crate) bitfield: Bitfield, - pub(crate) tree: TreeIndex, - pub(crate) peers: Vec, -} - -impl Feed -where - T: RandomAccess> + Debug + Send, -{ - /// Create a new instance with a custom storage backend. - pub async fn with_storage(mut storage: crate::storage::Storage) -> Result { - match storage.read_partial_keypair().await { - Some(partial_keypair) => { - let builder = FeedBuilder::new(partial_keypair.public, storage); - - // return early without secret key - if partial_keypair.secret.is_none() { - return Ok(builder.build().await?); - } - - builder - .secret_key( - partial_keypair - .secret - .ok_or_else(|| anyhow::anyhow!("secret-key not present"))?, - ) - .build() - .await - } - None => { - // we have no keys, generate a pair and save them to the storage - let keypair = generate_keypair(); - storage.write_public_key(&keypair.public).await?; - storage.write_secret_key(&keypair.secret).await?; - - FeedBuilder::new(keypair.public, storage) - .secret_key(keypair.secret) - .build() - .await - } - } - } - - /// Starts a `FeedBuilder` with the provided `PublicKey` and `Storage`. - pub fn builder(public_key: PublicKey, storage: Storage) -> FeedBuilder { - FeedBuilder::new(public_key, storage) - } - - /// Get the number of entries in the feed. - #[inline] - pub fn len(&self) -> u64 { - self.length - } - - /// Check if the length is 0. - #[inline] - pub fn is_empty(&self) -> bool { - self.len() == 0 - } - - /// Get the total amount of bytes stored in the feed. - #[inline] - pub fn byte_len(&self) -> u64 { - self.byte_length - } - - /// Append data into the log. /// - /// `append` will return an Err if this feed was not initalized with a [SecretKey]. - /// - /// It inserts the inputed data, it's signature, and a new [Merkle] node into [Storage]. - /// - /// [SecretKey]: ed25519_dalek::SecretKey - /// [Merkle]: crate::crypto::Merkle - /// [Storage]: crate::storage::Storage - #[inline] - pub async fn append(&mut self, data: &[u8]) -> Result<()> { - let key = match &self.secret_key { - Some(key) => key, - None => bail!("no secret key, cannot append."), - }; - self.merkle.next(data); - - self.storage - .write_data(self.byte_length as u64, &data) - .await?; - - let hash = Hash::from_roots(self.merkle.roots()); - let index = self.length; - let message = hash_with_length_as_bytes(hash, index + 1); - let signature = sign(&self.public_key, key, &message); - self.storage.put_signature(index, signature).await?; - - for node in self.merkle.nodes() { - self.storage.put_node(node).await?; - } - - self.byte_length += data.len() as u64; - - self.bitfield.set(index, true); - self.tree.set(tree_index(index)); - self.length += 1; - let bytes = self.bitfield.to_bytes(&self.tree)?; - self.storage.put_bitfield(0, &bytes).await?; - - Ok(()) - } - - /// Get the block of data at the tip of the feed. This will be the most - /// recently appended block. - #[inline] - pub async fn head(&mut self) -> Result>> { - match self.len() { - 0 => Ok(None), - len => self.get(len - 1).await, - } - } - - /// Return `true` if a data block is available locally. - #[inline] - pub fn has(&mut self, index: u64) -> bool { - self.bitfield.get(index) - } - - /// Return `true` if all data blocks within a range are available locally. - #[inline] - pub fn has_all(&mut self, range: ::std::ops::Range) -> bool { - let total = range.clone().count(); - total == self.bitfield.total_with_range(range) as usize - } - - /// Get the total amount of chunks downloaded. - #[inline] - pub fn downloaded(&mut self, range: ::std::ops::Range) -> u8 { - self.bitfield.total_with_range(range) - } - - /// Retrieve data from the log. - #[inline] - pub async fn get(&mut self, index: u64) -> Result>> { - if !self.bitfield.get(index) { - // NOTE: Do (network) lookup here once we have network code. - return Ok(None); - } - Ok(Some(self.storage.get_data(index).await?)) - } - - /// Return the Nodes which prove the correctness for the Node at index. - #[inline] - pub async fn proof(&mut self, index: u64, include_hash: bool) -> Result { - self.proof_with_digest(index, 0, include_hash).await - } - - /// Return the Nodes which prove the correctness for the Node at index with a - /// digest. - pub async fn proof_with_digest( - &mut self, - index: u64, - digest: u64, - include_hash: bool, - ) -> Result { - let mut remote_tree = TreeIndex::default(); - let mut nodes = vec![]; - - let proof = self.tree.proof_with_digest( - tree_index(index), - digest, - include_hash, - &mut nodes, - &mut remote_tree, - ); - - let proof = match proof { - Some(proof) => proof, - None => bail!("No proof available for index {}", index), - }; - - let tmp_num = proof.verified_by() / 2; - let (sig_index, has_underflow) = tmp_num.overflowing_sub(1); - let signature = if has_underflow { - None - } else { - match self.storage.get_signature(sig_index).await { - Ok(sig) => Some(sig), - Err(_) => None, - } - }; - - let mut nodes = Vec::with_capacity(proof.nodes().len()); - for index in proof.nodes() { - let node = self.storage.get_node(*index).await?; - nodes.push(node); - } - - Ok(Proof { - nodes, - signature, - index, - }) - } - - /// Compute the digest for the index. - pub fn digest(&mut self, index: u64) -> u64 { - self.tree.digest(tree_index(index)) - } - - /// Insert data into the tree at `index`. Verifies the `proof` when inserting - /// to make sure data is correct. Useful when replicating data from a remote - /// host. - pub async fn put(&mut self, index: u64, data: Option<&[u8]>, mut proof: Proof) -> Result<()> { - let mut next = tree_index(index); - let mut trusted: Option = None; - let mut missing = vec![]; - - let mut i = match data { - Some(_) => 0, - None => 1, - }; - - loop { - if self.tree.get(next) { - trusted = Some(next); - break; - } - let sibling = flat::sibling(next); - next = flat::parent(next); - if i < proof.nodes.len() && proof.nodes[i].index == sibling { - i += 1; - continue; - } - if !self.tree.get(sibling) { - break; - } - missing.push(sibling); - } - - if trusted.is_none() && self.tree.get(next) { - trusted = Some(next); - } - - let mut missing_nodes = vec![]; - for index in missing { - let node = self.storage.get_node(index).await?; - missing_nodes.push(node); - } - - let mut trusted_node = None; - if let Some(index) = trusted { - let node = self.storage.get_node(index).await?; - trusted_node = Some(node); - } - - let mut visited = vec![]; - let mut top = match data { - Some(data) => Node::new( - tree_index(index), - Hash::from_leaf(&data).as_bytes().to_owned(), - data.len() as u64, - ), - None => proof.nodes.remove(0), - }; - - // check if we already have the hash for this node - if verify_node(&trusted_node, &top) { - self.write(index, data, &visited, None).await?; - return Ok(()); - } - - // keep hashing with siblings until we reach the end or trusted node - loop { - let node; - let next = flat::sibling(top.index); - - if !proof.nodes.is_empty() && proof.nodes[0].index == next { - node = proof.nodes.remove(0); - visited.push(node.clone()); - } else if !missing_nodes.is_empty() && missing_nodes[0].index == next { - node = missing_nodes.remove(0); - } else { - // TODO: panics here - let nodes = self.verify_roots(&top, &mut proof).await?; - visited.extend_from_slice(&nodes); - self.write(index, data, &visited, proof.signature).await?; - return Ok(()); - } - - visited.push(top.clone()); - let hash = Hash::from_hashes(&top, &node); - let len = top.len() + node.len(); - top = Node::new(flat::parent(top.index), hash.as_bytes().into(), len); - - if verify_node(&trusted_node, &top) { - self.write(index, data, &visited, None).await?; - return Ok(()); - } - } - - fn verify_node(trusted: &Option, node: &Node) -> bool { - match trusted { - None => false, - Some(trusted) => trusted.index == node.index && trusted.hash == node.hash, - } - } - } - - /// Write some data to disk. Usually used in combination with `.put()`. - // in JS this calls to: - // - ._write() - // - ._onwrite() (emit the 'write' event), if it exists - // - ._writeAfterHook() (optionally going through writeHookdone()) - // - ._writeDone() - // - // Arguments are: (index, data, node, sig, from, cb) - async fn write( - &mut self, - index: u64, - data: Option<&[u8]>, - nodes: &[Node], - sig: Option, - ) -> Result<()> { - for node in nodes { - self.storage.put_node(node).await?; - } - - if let Some(data) = data { - self.storage.put_data(index, data, &nodes).await?; - } - - if let Some(sig) = sig { - let sig = sig.borrow(); - self.storage.put_signature(index, sig).await?; - } - - for node in nodes { - self.tree.set(node.index); - } - - self.tree.set(tree_index(index)); - - if let Some(_data) = data { - if self.bitfield.set(index, true).is_changed() { - // TODO: emit "download" event - } - // TODO: check peers.length, call ._announce if peers exist. - } - - // TODO: Discern between "primary" and "replica" streams. - // if (!this.writable) { - // if (!this._synced) this._synced = this.bitfield.iterator(0, this.length) - // if (this._synced.next() === -1) { - // this._synced.range(0, this.length) - // this._synced.seek(0) - // if (this._synced.next() === -1) { - // this.emit('sync') - // } - // } - // } - - Ok(()) - } - - /// Get a signature from the store. - pub async fn signature(&mut self, index: u64) -> Result { - ensure!( - index < self.length, - format!("No signature found for index {}", index) - ); - self.storage.next_signature(index).await - } - - /// Verify the entire feed. Checks a signature against the signature of all - /// root nodes combined. - pub async fn verify(&mut self, index: u64, signature: &Signature) -> Result<()> { - let roots = self.root_hashes(index).await?; - let roots: Vec<_> = roots.into_iter().map(Arc::new).collect(); - - let hash = Hash::from_roots(&roots); - let message = hash_with_length_as_bytes(hash, index + 1); - - verify_compat(&self.public_key, &message, Some(signature))?; - Ok(()) - } - - /// Announce we have a piece of data to all other peers. - // TODO: probably shouldn't be public - pub fn announce(&mut self, message: &Message, from: &Peer) { - for peer in &mut self.peers { - if peer != from { - peer.have(message) - } - } - } - - /// Announce we no longer have a piece of data to all other peers. - // TODO: probably shouldn't be public - pub fn unannounce(&mut self, message: &Message) { - for peer in &mut self.peers { - peer.unhave(message) - } - } - - /// Get all root hashes from the feed. - // In the JavaScript implementation this calls to `._getRootsToVerify()` - // internally. In Rust it seems better to just inline the code. - pub async fn root_hashes(&mut self, index: u64) -> Result> { - ensure!( - index <= self.length, - format!("Root index bounds exceeded {} > {}", index, self.length) - ); - let roots_index = tree_index(index) + 2; - let mut indexes = vec![]; - flat::full_roots(roots_index, &mut indexes); - - let mut roots = Vec::with_capacity(indexes.len()); - for index in indexes { - let node = self.storage.get_node(index).await?; - roots.push(node); - } - - Ok(roots) - } - - /// Access the public key. - pub fn public_key(&self) -> &PublicKey { - &self.public_key - } - - /// Access the secret key. - pub fn secret_key(&self) -> &Option { - &self.secret_key - } - - async fn verify_roots(&mut self, top: &Node, proof: &mut Proof) -> Result> { - let last_node = if !proof.nodes.is_empty() { - proof.nodes[proof.nodes.len() - 1].index - } else { - top.index - }; - - let verified_by = cmp::max(flat::right_span(top.index), flat::right_span(last_node)) + 2; - - let mut indexes = vec![]; - flat::full_roots(verified_by, &mut indexes); - let mut roots = Vec::with_capacity(indexes.len()); - let mut extra_nodes = vec![]; - - for index in indexes { - if index == top.index { - extra_nodes.push(top.clone()); - roots.push(top.clone()); // TODO: verify this is the right index to push to. - } else if !proof.nodes.is_empty() && index == proof.nodes[0].index { - extra_nodes.push(proof.nodes[0].clone()); - roots.push(proof.nodes.remove(0)); // TODO: verify this is the right index to push to. - } else if self.tree.get(index) { - let node = self.storage.get_node(index).await?; - roots.push(node); - } else { - bail!(": Missing tree roots needed for verify"); - } - } - - let checksum = Hash::from_roots(&roots); - let length = verified_by / 2; - let message = hash_with_length_as_bytes(checksum, length); - verify_compat(&self.public_key, &message, proof.signature())?; - - // Update the length if we grew the feed. - let len = verified_by / 2; - if len > self.len() { - self.length = len; - self.byte_length = roots.iter().fold(0, |acc, root| acc + root.index) - // TODO: emit('append') - } - - Ok(extra_nodes) - } - - /// Audit all data in the feed. Checks that all current data matches - /// the hashes in the merkle tree, and clears the bitfield if not. - /// The tuple returns is (valid_blocks, invalid_blocks) - pub async fn audit(&mut self) -> Result { - let mut valid_blocks = 0; - let mut invalid_blocks = 0; - for index in 0..self.length { - if self.bitfield.get(index) { - let node = self.storage.get_node(2 * index).await?; - let data = self.storage.get_data(index).await?; - let data_hash = Hash::from_leaf(&data); - if node.hash == data_hash.as_bytes() { - valid_blocks += 1; - } else { - invalid_blocks += 1; - self.bitfield.set(index, false); - } - } - } - Ok(Audit { - valid_blocks, - invalid_blocks, - }) - } - - /// Expose the bitfield attribute to use on during download - pub fn bitfield(&self) -> &Bitfield { - &self.bitfield - } - - /// (unimplemented) Provide a range of data to download. - pub fn download(&mut self, _range: Range) -> Result<()> { - unimplemented!(); - } - - /// (unimplemented) Provide a range of data to remove from the local storage. - pub fn undownload(&mut self, _range: Range) -> Result<()> { - unimplemented!(); - } - - /// (unimplemented) End the feed. - pub fn finalize(&mut self) -> Result<()> { - // if (!this.key) { - // this.key = crypto.tree(this._merkle.roots) - // this.discoveryKey = crypto.discoveryKey(this.key) - // } - // this._storage.key.write(0, this.key, cb) - unimplemented!(); - } - - /// Update all peers. - pub fn update_peers(&mut self) { - for peer in &mut self.peers { - peer.update(); - } - } -} - -impl Feed { - /// Create a new instance that persists to disk at the location of `dir`. - /// If dir was not there, it will be created. - // NOTE: Should we call these `data.bitfield` / `data.tree`? - pub async fn open>(path: P) -> Result { - if let Err(e) = std::fs::create_dir_all(&path) { - return Err(anyhow::Error::msg(format!( - "Failed to create directory {} because of: {}", - path.as_ref().display(), - e - ))); - } - - let dir = path.as_ref().to_owned(); - - let storage = Storage::new_disk(&dir, false).await?; - Self::with_storage(storage).await - } -} - -/// Create a new instance with an in-memory storage backend. -/// -/// ## Panics -/// Can panic if constructing the in-memory store fails, which is highly -/// unlikely. -impl Default for Feed { - fn default() -> Self { - async_std::task::block_on(async { - let storage = Storage::new_memory().await.unwrap(); - Self::with_storage(storage).await.unwrap() - }) - } -} - -impl> + Debug + Send> Display - for Feed -{ - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - // TODO: yay, we should find a way to convert this .unwrap() to an error - // type that's accepted by `fmt::Result<(), fmt::Error>`. - let key = pretty_fmt(&self.public_key.to_bytes()).unwrap(); - let byte_len = self.byte_len(); - let len = self.len(); - let peers = 0; // TODO: update once we actually have peers. - write!( - f, - "Hypercore(key=[{}], length={}, byte_length={}, peers={})", - key, len, byte_len, peers - ) - } -} - -/// Convert the index to the index in the tree. -#[inline] -fn tree_index(index: u64) -> u64 { - 2 * index -} - -/// Extend a hash with a big-endian encoded length. -fn hash_with_length_as_bytes(hash: Hash, length: u64) -> Vec { - [hash.as_bytes(), &length.to_be_bytes()].concat().to_vec() -} - -/// Verify a signature. If it fails, remove the length suffix added in Hypercore v9 -/// and verify again (backwards compatibility, remove in later version). -pub fn verify_compat(public: &PublicKey, msg: &[u8], sig: Option<&Signature>) -> Result<()> { - match verify(public, msg, sig) { - Ok(_) => Ok(()), - Err(_) => verify(public, &msg[0..32], sig), - } -} diff --git a/src/feed_builder.rs b/src/feed_builder.rs deleted file mode 100644 index e4ac5419..00000000 --- a/src/feed_builder.rs +++ /dev/null @@ -1,89 +0,0 @@ -use ed25519_dalek::{PublicKey, SecretKey}; - -use crate::bitfield::Bitfield; -use crate::crypto::Merkle; -use crate::storage::Storage; -use random_access_storage::RandomAccess; -use std::fmt::Debug; -use tree_index::TreeIndex; - -use crate::Feed; -use anyhow::Result; - -/// Construct a new `Feed` instance. -// TODO: make this an actual builder pattern. -// https://deterministic.space/elegant-apis-in-rust.html#builder-pattern -#[derive(Debug)] -pub struct FeedBuilder -where - T: RandomAccess + Debug, -{ - storage: Storage, - public_key: PublicKey, - secret_key: Option, -} - -impl FeedBuilder -where - T: RandomAccess> + Debug + Send, -{ - /// Create a new instance. - #[inline] - pub fn new(public_key: PublicKey, storage: Storage) -> Self { - Self { - storage, - public_key, - secret_key: None, - } - } - - /// Set the secret key. - pub fn secret_key(mut self, secret_key: SecretKey) -> Self { - self.secret_key = Some(secret_key); - self - } - - /// Finalize the builder. - #[inline] - pub async fn build(mut self) -> Result> { - let (bitfield, tree) = if let Ok(bitfield) = self.storage.read_bitfield().await { - Bitfield::from_slice(&bitfield) - } else { - Bitfield::new() - }; - use crate::storage::Node; - - let mut tree = TreeIndex::new(tree); - let mut roots = vec![]; - flat_tree::full_roots(tree.blocks() * 2, &mut roots); - let mut result: Vec> = vec![None; roots.len()]; - - for i in 0..roots.len() { - let node = self.storage.get_node(roots[i] as u64).await?; - let idx = roots - .iter() - .position(|&x| x == node.index) - .ok_or_else(|| anyhow::anyhow!("Couldnt find idx of node"))?; - result[idx] = Some(node); - } - - let roots = result - .into_iter() - .collect::>>() - .ok_or_else(|| anyhow::anyhow!("Roots contains undefined nodes"))?; - - let byte_length = roots.iter().fold(0, |acc, node| acc + node.length); - - Ok(Feed { - merkle: Merkle::from_nodes(roots), - byte_length, - length: tree.blocks(), - bitfield, - tree, - public_key: self.public_key, - secret_key: self.secret_key, - storage: self.storage, - peers: vec![], - }) - } -} diff --git a/src/lib.rs b/src/lib.rs index 0c99939c..a403e381 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,60 +2,100 @@ #![forbid(rust_2018_idioms, rust_2018_compatibility)] #![forbid(missing_debug_implementations)] #![forbid(missing_docs)] +#![warn(unreachable_pub)] #![cfg_attr(test, deny(warnings))] +#![doc(test(attr(deny(warnings))))] //! ## Introduction +//! //! Hypercore is a secure, distributed append-only log. Built for sharing //! large datasets and streams of real time data as part of the [Dat] project. -//! This is a rust port of [the original node version][dat-node] -//! aiming for interoperability. The primary way to use this crate is through the [Feed] struct. +//! This is a rust port of [the original Javascript version][holepunch-hypercore] +//! aiming for interoperability with LTS version. The primary way to use this +//! crate is through the [Hypercore] struct, which can be created using the +//! [HypercoreBuilder]. +//! +//! This crate supports WASM with `cargo build --target=wasm32-unknown-unknown`. +//! +//! ## Features +//! +//! ### `sparse` (default) +//! +//! When using disk storage, clearing values may create sparse files. On by default. +//! +//! ### `async-std` (default) +//! +//! Use the async-std runtime, on by default. Either this or `tokio` is mandatory. +//! +//! ### `tokio` +//! +//! Use the tokio runtime. Either this or `async_std` is mandatory. +//! +//! ### `cache` +//! +//! Use a moka cache for merkle tree nodes to speed-up reading. //! //! ## Example //! ```rust -//! # fn main() -> Result<(), Box> { +//! # #[cfg(feature = "tokio")] +//! # tokio_test::block_on(async { +//! # example().await; +//! # }); +//! # #[cfg(feature = "async-std")] //! # async_std::task::block_on(async { -//! let mut feed = hypercore::open("./feed.db").await?; +//! # example().await; +//! # }); +//! # async fn example() { +//! use hypercore::{HypercoreBuilder, Storage}; +//! +//! // Create an in-memory hypercore using a builder +//! let mut hypercore = HypercoreBuilder::new(Storage::new_memory().await.unwrap()) +//! .build() +//! .await +//! .unwrap(); //! -//! feed.append(b"hello").await?; -//! feed.append(b"world").await?; +//! // Append entries to the log +//! hypercore.append(b"Hello, ").await.unwrap(); +//! hypercore.append(b"world!").await.unwrap(); //! -//! assert_eq!(feed.get(0).await?, Some(b"hello".to_vec())); -//! assert_eq!(feed.get(1).await?, Some(b"world".to_vec())); -//! # Ok(()) -//! # }) +//! // Read entries from the log +//! assert_eq!(hypercore.get(0).await.unwrap().unwrap(), b"Hello, "); +//! assert_eq!(hypercore.get(1).await.unwrap().unwrap(), b"world!"); //! # } //! ``` //! -//! [dat-node]: https://github.com/mafintosh/hypercore +//! Find more examples in the [examples] folder. +//! //! [Dat]: https://github.com/datrs -//! [Feed]: crate::feed::Feed +//! [holepunch-hypercore]: https://github.com/holepunchto/hypercore +//! [Hypercore]: crate::core::Hypercore +//! [HypercoreBuilder]: crate::builder::HypercoreBuilder +//! [examples]: https://github.com/datrs/hypercore/tree/master/examples -pub mod bitfield; +pub mod encoding; pub mod prelude; -mod audit; +mod bitfield; +mod builder; +mod common; +mod core; mod crypto; -mod event; -mod feed; -mod feed_builder; -mod proof; -mod replicate; +mod data; +mod oplog; mod storage; +mod tree; -pub use crate::crypto::{generate_keypair, sign, verify, Signature}; -pub use crate::event::Event; -pub use crate::feed::Feed; -pub use crate::feed_builder::FeedBuilder; -pub use crate::proof::Proof; -pub use crate::replicate::Peer; -pub use crate::storage::{Node, NodeTrait, Storage, Store}; -pub use ed25519_dalek::{PublicKey, SecretKey}; - -use std::path::Path; - -/// Create a new Hypercore `Feed`. -pub async fn open>( - path: P, -) -> anyhow::Result> { - Feed::open(path).await -} +#[cfg(feature = "cache")] +pub use crate::builder::CacheOptionsBuilder; +pub use crate::builder::HypercoreBuilder; +pub use crate::common::{ + DataBlock, DataHash, DataSeek, DataUpgrade, HypercoreError, Node, Proof, RequestBlock, + RequestSeek, RequestUpgrade, Store, +}; +pub use crate::core::{AppendOutcome, Hypercore, Info}; +pub use crate::crypto::{generate_signing_key, sign, verify, PartialKeypair}; +pub use crate::storage::Storage; +pub use ed25519_dalek::{ + SecretKey, Signature, SigningKey, VerifyingKey, KEYPAIR_LENGTH, PUBLIC_KEY_LENGTH, + SECRET_KEY_LENGTH, +}; diff --git a/src/oplog/entry.rs b/src/oplog/entry.rs new file mode 100644 index 00000000..62e4299b --- /dev/null +++ b/src/oplog/entry.rs @@ -0,0 +1,164 @@ +use crate::encoding::{CompactEncoding, EncodingError, HypercoreState}; +use crate::{common::BitfieldUpdate, Node}; + +/// Entry tree upgrade +#[derive(Debug)] +pub(crate) struct EntryTreeUpgrade { + pub(crate) fork: u64, + pub(crate) ancestors: u64, + pub(crate) length: u64, + pub(crate) signature: Box<[u8]>, +} + +impl CompactEncoding for HypercoreState { + fn preencode(&mut self, value: &EntryTreeUpgrade) -> Result { + self.0.preencode(&value.fork)?; + self.0.preencode(&value.ancestors)?; + self.0.preencode(&value.length)?; + self.0.preencode(&value.signature) + } + + fn encode( + &mut self, + value: &EntryTreeUpgrade, + buffer: &mut [u8], + ) -> Result { + self.0.encode(&value.fork, buffer)?; + self.0.encode(&value.ancestors, buffer)?; + self.0.encode(&value.length, buffer)?; + self.0.encode(&value.signature, buffer) + } + + fn decode(&mut self, buffer: &[u8]) -> Result { + let fork: u64 = self.0.decode(buffer)?; + let ancestors: u64 = self.0.decode(buffer)?; + let length: u64 = self.0.decode(buffer)?; + let signature: Box<[u8]> = self.0.decode(buffer)?; + Ok(EntryTreeUpgrade { + fork, + ancestors, + length, + signature, + }) + } +} + +impl CompactEncoding for HypercoreState { + fn preencode(&mut self, value: &BitfieldUpdate) -> Result { + self.0.add_end(1)?; + self.0.preencode(&value.start)?; + self.0.preencode(&value.length) + } + + fn encode( + &mut self, + value: &BitfieldUpdate, + buffer: &mut [u8], + ) -> Result { + let flags: u8 = if value.drop { 1 } else { 0 }; + self.0.set_byte_to_buffer(flags, buffer)?; + self.0.encode(&value.start, buffer)?; + self.0.encode(&value.length, buffer) + } + + fn decode(&mut self, buffer: &[u8]) -> Result { + let flags = self.0.decode_u8(buffer)?; + let start: u64 = self.0.decode(buffer)?; + let length: u64 = self.0.decode(buffer)?; + Ok(BitfieldUpdate { + drop: flags == 1, + start, + length, + }) + } +} + +/// Oplog Entry +#[derive(Debug)] +pub struct Entry { + // TODO: This is a keyValueArray in JS + pub(crate) user_data: Vec, + pub(crate) tree_nodes: Vec, + pub(crate) tree_upgrade: Option, + pub(crate) bitfield: Option, +} + +impl CompactEncoding for HypercoreState { + fn preencode(&mut self, value: &Entry) -> Result { + self.0.add_end(1)?; // flags + if !value.user_data.is_empty() { + self.0.preencode(&value.user_data)?; + } + if !value.tree_nodes.is_empty() { + self.preencode(&value.tree_nodes)?; + } + if let Some(tree_upgrade) = &value.tree_upgrade { + self.preencode(tree_upgrade)?; + } + if let Some(bitfield) = &value.bitfield { + self.preencode(bitfield)?; + } + Ok(self.end()) + } + + fn encode(&mut self, value: &Entry, buffer: &mut [u8]) -> Result { + let start = self.0.start(); + self.0.add_start(1)?; + let mut flags: u8 = 0; + if !value.user_data.is_empty() { + flags |= 1; + self.0.encode(&value.user_data, buffer)?; + } + if !value.tree_nodes.is_empty() { + flags |= 2; + self.encode(&value.tree_nodes, buffer)?; + } + if let Some(tree_upgrade) = &value.tree_upgrade { + flags |= 4; + self.encode(tree_upgrade, buffer)?; + } + if let Some(bitfield) = &value.bitfield { + flags |= 8; + self.encode(bitfield, buffer)?; + } + + buffer[start] = flags; + Ok(self.0.start()) + } + + fn decode(&mut self, buffer: &[u8]) -> Result { + let flags = self.0.decode_u8(buffer)?; + let user_data: Vec = if flags & 1 != 0 { + self.0.decode(buffer)? + } else { + vec![] + }; + + let tree_nodes: Vec = if flags & 2 != 0 { + self.decode(buffer)? + } else { + vec![] + }; + + let tree_upgrade: Option = if flags & 4 != 0 { + let value: EntryTreeUpgrade = self.decode(buffer)?; + Some(value) + } else { + None + }; + + let bitfield: Option = if flags & 4 != 0 { + let value: BitfieldUpdate = self.decode(buffer)?; + Some(value) + } else { + None + }; + + Ok(Entry { + user_data, + tree_nodes, + tree_upgrade, + bitfield, + }) + } +} diff --git a/src/oplog/header.rs b/src/oplog/header.rs new file mode 100644 index 00000000..aa27dcec --- /dev/null +++ b/src/oplog/header.rs @@ -0,0 +1,325 @@ +use compact_encoding::EncodingErrorKind; +use compact_encoding::{CompactEncoding, EncodingError, State}; +use ed25519_dalek::{SigningKey, PUBLIC_KEY_LENGTH, SECRET_KEY_LENGTH}; +use std::convert::TryInto; + +use crate::crypto::default_signer_manifest; +use crate::crypto::Manifest; +use crate::PartialKeypair; +use crate::VerifyingKey; + +/// Oplog header. +#[derive(Debug, Clone)] +pub(crate) struct Header { + // TODO: v11 has external + // pub(crate) external: Option, + // NB: This is the manifest hash in v11, right now + // just the public key, + pub(crate) key: [u8; 32], + pub(crate) manifest: Manifest, + pub(crate) key_pair: PartialKeypair, + // TODO: This is a keyValueArray in JS + pub(crate) user_data: Vec, + pub(crate) tree: HeaderTree, + pub(crate) hints: HeaderHints, +} + +impl Header { + /// Creates a new Header from given key pair + pub(crate) fn new(key_pair: PartialKeypair) -> Self { + let key = key_pair.public.to_bytes(); + let manifest = default_signer_manifest(key); + Self { + key, + manifest, + key_pair, + user_data: vec![], + tree: HeaderTree::new(), + hints: HeaderHints { + reorgs: vec![], + contiguous_length: 0, + }, + } + // Javascript side, initial header + // header = { + // external: null, + // key: opts.key || (compat ? manifest.signer.publicKey : manifestHash(manifest)), + // manifest, + // keyPair, + // userData: [], + // tree: { + // fork: 0, + // length: 0, + // rootHash: null, + // signature: null + // }, + // hints: { + // reorgs: [], + // contiguousLength: 0 + // } + // } + } +} + +/// Oplog header tree +#[derive(Debug, PartialEq, Clone)] +pub(crate) struct HeaderTree { + pub(crate) fork: u64, + pub(crate) length: u64, + pub(crate) root_hash: Box<[u8]>, + pub(crate) signature: Box<[u8]>, +} + +impl HeaderTree { + pub(crate) fn new() -> Self { + Self { + fork: 0, + length: 0, + root_hash: Box::new([]), + signature: Box::new([]), + } + } +} + +impl CompactEncoding for State { + fn preencode(&mut self, value: &HeaderTree) -> Result { + self.preencode(&value.fork)?; + self.preencode(&value.length)?; + self.preencode(&value.root_hash)?; + self.preencode(&value.signature) + } + + fn encode(&mut self, value: &HeaderTree, buffer: &mut [u8]) -> Result { + self.encode(&value.fork, buffer)?; + self.encode(&value.length, buffer)?; + self.encode(&value.root_hash, buffer)?; + self.encode(&value.signature, buffer) + } + + fn decode(&mut self, buffer: &[u8]) -> Result { + let fork: u64 = self.decode(buffer)?; + let length: u64 = self.decode(buffer)?; + let root_hash: Box<[u8]> = self.decode(buffer)?; + let signature: Box<[u8]> = self.decode(buffer)?; + Ok(HeaderTree { + fork, + length, + root_hash, + signature, + }) + } +} + +/// NB: In Javascript's sodium the secret key contains in itself also the public key, so to +/// maintain binary compatibility, we store the public key in the oplog now twice. +impl CompactEncoding for State { + fn preencode(&mut self, value: &PartialKeypair) -> Result { + self.add_end(1 + PUBLIC_KEY_LENGTH)?; + match &value.secret { + Some(_) => { + // Also add room for the public key + self.add_end(1 + SECRET_KEY_LENGTH + PUBLIC_KEY_LENGTH) + } + None => self.add_end(1), + } + } + + fn encode( + &mut self, + value: &PartialKeypair, + buffer: &mut [u8], + ) -> Result { + let public_key_bytes: Box<[u8]> = value.public.as_bytes().to_vec().into_boxed_slice(); + self.encode(&public_key_bytes, buffer)?; + match &value.secret { + Some(secret_key) => { + let mut secret_key_bytes: Vec = + Vec::with_capacity(SECRET_KEY_LENGTH + PUBLIC_KEY_LENGTH); + secret_key_bytes.extend_from_slice(&secret_key.to_bytes()); + secret_key_bytes.extend_from_slice(&public_key_bytes); + let secret_key_bytes: Box<[u8]> = secret_key_bytes.into_boxed_slice(); + self.encode(&secret_key_bytes, buffer) + } + None => self.set_byte_to_buffer(0, buffer), + } + } + + fn decode(&mut self, buffer: &[u8]) -> Result { + let public_key_bytes: Box<[u8]> = self.decode(buffer)?; + let public_key_bytes: [u8; PUBLIC_KEY_LENGTH] = + public_key_bytes[0..PUBLIC_KEY_LENGTH].try_into().unwrap(); + let secret_key_bytes: Box<[u8]> = self.decode(buffer)?; + let secret: Option = if secret_key_bytes.is_empty() { + None + } else { + let secret_key_bytes: [u8; SECRET_KEY_LENGTH] = + secret_key_bytes[0..SECRET_KEY_LENGTH].try_into().unwrap(); + Some(SigningKey::from_bytes(&secret_key_bytes)) + }; + + Ok(PartialKeypair { + public: VerifyingKey::from_bytes(&public_key_bytes).unwrap(), + secret, + }) + } +} + +/// Oplog header hints +#[derive(Debug, Clone)] +pub(crate) struct HeaderHints { + pub(crate) reorgs: Vec, + pub(crate) contiguous_length: u64, +} + +impl CompactEncoding for State { + fn preencode(&mut self, value: &HeaderHints) -> Result { + self.preencode(&value.reorgs)?; + self.preencode(&value.contiguous_length) + } + + fn encode(&mut self, value: &HeaderHints, buffer: &mut [u8]) -> Result { + self.encode(&value.reorgs, buffer)?; + self.encode(&value.contiguous_length, buffer) + } + + fn decode(&mut self, buffer: &[u8]) -> Result { + Ok(HeaderHints { + reorgs: self.decode(buffer)?, + contiguous_length: self.decode(buffer)?, + }) + } +} + +impl CompactEncoding
for State { + fn preencode(&mut self, value: &Header) -> Result { + self.add_end(1)?; // Version + self.add_end(1)?; // Flags + self.preencode_fixed_32()?; // key + self.preencode(&value.manifest)?; + self.preencode(&value.key_pair)?; + self.preencode(&value.user_data)?; + self.preencode(&value.tree)?; + self.preencode(&value.hints) + } + + fn encode(&mut self, value: &Header, buffer: &mut [u8]) -> Result { + self.set_byte_to_buffer(1, buffer)?; // Version + let flags: u8 = 2 | 4; // Manifest and key pair, TODO: external=1 + self.set_byte_to_buffer(flags, buffer)?; + self.encode_fixed_32(&value.key, buffer)?; + self.encode(&value.manifest, buffer)?; + self.encode(&value.key_pair, buffer)?; + self.encode(&value.user_data, buffer)?; + self.encode(&value.tree, buffer)?; + self.encode(&value.hints, buffer) + } + + fn decode(&mut self, buffer: &[u8]) -> Result { + let version: u8 = self.decode_u8(buffer)?; + if version != 1 { + panic!("Unknown oplog version {}", version); + } + let _flags: u8 = self.decode_u8(buffer)?; + let key: [u8; 32] = self + .decode_fixed_32(buffer)? + .to_vec() + .try_into() + .map_err(|_err| { + EncodingError::new( + EncodingErrorKind::InvalidData, + "Invalid key in oplog header", + ) + })?; + let manifest: Manifest = self.decode(buffer)?; + let key_pair: PartialKeypair = self.decode(buffer)?; + let user_data: Vec = self.decode(buffer)?; + let tree: HeaderTree = self.decode(buffer)?; + let hints: HeaderHints = self.decode(buffer)?; + + Ok(Header { + key, + manifest, + key_pair, + user_data, + tree, + hints, + }) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + use crate::crypto::generate_signing_key; + + #[test] + fn encode_partial_key_pair() -> Result<(), EncodingError> { + let mut enc_state = State::new(); + let signing_key = generate_signing_key(); + let key_pair = PartialKeypair { + public: signing_key.verifying_key(), + secret: Some(signing_key), + }; + enc_state.preencode(&key_pair)?; + let mut buffer = enc_state.create_buffer(); + // Pub key: 1 byte for length, 32 bytes for content + // Sec key: 1 byte for length, 64 bytes for data + let expected_len = 1 + 32 + 1 + 64; + assert_eq!(buffer.len(), expected_len); + assert_eq!(enc_state.end(), expected_len); + assert_eq!(enc_state.start(), 0); + enc_state.encode(&key_pair, &mut buffer)?; + let mut dec_state = State::from_buffer(&buffer); + let key_pair_ret: PartialKeypair = dec_state.decode(&buffer)?; + assert_eq!(key_pair.public, key_pair_ret.public); + assert_eq!( + key_pair.secret.unwrap().to_bytes(), + key_pair_ret.secret.unwrap().to_bytes() + ); + Ok(()) + } + + #[test] + fn encode_tree() -> Result<(), EncodingError> { + let mut enc_state = State::new(); + let tree = HeaderTree::new(); + enc_state.preencode(&tree)?; + let mut buffer = enc_state.create_buffer(); + enc_state.encode(&tree, &mut buffer)?; + let mut dec_state = State::from_buffer(&buffer); + let tree_ret: HeaderTree = dec_state.decode(&buffer)?; + assert_eq!(tree, tree_ret); + Ok(()) + } + + #[test] + fn encode_header() -> Result<(), EncodingError> { + let mut enc_state = State::new(); + let signing_key = generate_signing_key(); + let signing_key = PartialKeypair { + public: signing_key.verifying_key(), + secret: Some(signing_key), + }; + let header = Header::new(signing_key); + enc_state.preencode(&header)?; + let mut buffer = enc_state.create_buffer(); + enc_state.encode(&header, &mut buffer)?; + let mut dec_state = State::from_buffer(&buffer); + let header_ret: Header = dec_state.decode(&buffer)?; + assert_eq!(header.key_pair.public, header_ret.key_pair.public); + assert_eq!(header.tree.fork, header_ret.tree.fork); + assert_eq!(header.tree.length, header_ret.tree.length); + assert_eq!(header.tree.length, header_ret.tree.length); + assert_eq!(header.manifest.hash, header_ret.manifest.hash); + assert_eq!( + header.manifest.signer.public_key, + header_ret.manifest.signer.public_key + ); + assert_eq!( + header.manifest.signer.signature, + header_ret.manifest.signer.signature + ); + Ok(()) + } +} diff --git a/src/oplog/mod.rs b/src/oplog/mod.rs new file mode 100644 index 00000000..6c720201 --- /dev/null +++ b/src/oplog/mod.rs @@ -0,0 +1,495 @@ +use futures::future::Either; +use std::convert::{TryFrom, TryInto}; + +use crate::common::{BitfieldUpdate, Store, StoreInfo, StoreInfoInstruction}; +use crate::encoding::{CompactEncoding, HypercoreState}; +use crate::tree::MerkleTreeChangeset; +use crate::{HypercoreError, Node, PartialKeypair}; + +mod entry; +mod header; + +pub(crate) use entry::{Entry, EntryTreeUpgrade}; +pub(crate) use header::{Header, HeaderTree}; + +pub(crate) const MAX_OPLOG_ENTRIES_BYTE_SIZE: u64 = 65536; +const HEADER_SIZE: usize = 4096; + +/// Oplog. +/// +/// There are two memory areas for an `Header` in `RandomAccessStorage`: one is the current +/// and one is the older. Which one is used depends on the value stored in the eigth byte's +/// eight bit of the stored headers. +#[derive(Debug)] +pub(crate) struct Oplog { + header_bits: [bool; 2], + pub(crate) entries_length: u64, + pub(crate) entries_byte_length: u64, +} + +/// Oplog create header outcome +#[derive(Debug)] +pub(crate) struct OplogCreateHeaderOutcome { + pub(crate) header: Header, + pub(crate) infos_to_flush: Box<[StoreInfo]>, +} + +/// Oplog open outcome +#[derive(Debug)] +pub(crate) struct OplogOpenOutcome { + pub(crate) oplog: Oplog, + pub(crate) header: Header, + pub(crate) infos_to_flush: Box<[StoreInfo]>, + pub(crate) entries: Option>, +} + +impl OplogOpenOutcome { + pub(crate) fn new(oplog: Oplog, header: Header, infos_to_flush: Box<[StoreInfo]>) -> Self { + Self { + oplog, + header, + infos_to_flush, + entries: None, + } + } + pub(crate) fn from_create_header_outcome( + oplog: Oplog, + create_header_outcome: OplogCreateHeaderOutcome, + ) -> Self { + Self { + oplog, + header: create_header_outcome.header, + infos_to_flush: create_header_outcome.infos_to_flush, + entries: None, + } + } +} + +#[repr(usize)] +enum OplogSlot { + FirstHeader = 0, + SecondHeader = HEADER_SIZE, + Entries = HEADER_SIZE * 2, +} + +#[derive(Debug)] +struct ValidateLeaderOutcome { + state: HypercoreState, + header_bit: bool, + partial_bit: bool, +} + +// The first set of bits is [1, 0], see `get_next_header_oplog_slot_and_bit_value` for how +// they change. +const INITIAL_HEADER_BITS: [bool; 2] = [true, false]; + +impl Oplog { + /// Opens an existing Oplog from existing byte buffer or creates a new one. + pub(crate) fn open( + key_pair: &Option, + info: Option, + ) -> Result, HypercoreError> { + match info { + None => Ok(Either::Left(StoreInfoInstruction::new_all_content( + Store::Oplog, + ))), + Some(info) => { + let existing = info.data.expect("Could not get data of existing oplog"); + // First read and validate both headers stored in the existing oplog + let h1_outcome = Self::validate_leader(OplogSlot::FirstHeader as usize, &existing)?; + let h2_outcome = + Self::validate_leader(OplogSlot::SecondHeader as usize, &existing)?; + + // Depending on what is stored, the state needs to be set accordingly. + // See `get_next_header_oplog_slot_and_bit_value` for details on header_bits. + let mut outcome: OplogOpenOutcome = if let Some(mut h1_outcome) = h1_outcome { + let (header, header_bits): (Header, [bool; 2]) = + if let Some(mut h2_outcome) = h2_outcome { + let header_bits = [h1_outcome.header_bit, h2_outcome.header_bit]; + let header: Header = if header_bits[0] == header_bits[1] { + (*h1_outcome.state).decode(&existing)? + } else { + (*h2_outcome.state).decode(&existing)? + }; + (header, header_bits) + } else { + ( + (*h1_outcome.state).decode(&existing)?, + [h1_outcome.header_bit, h1_outcome.header_bit], + ) + }; + let oplog = Oplog { + header_bits, + entries_length: 0, + entries_byte_length: 0, + }; + OplogOpenOutcome::new(oplog, header, Box::new([])) + } else if let Some(mut h2_outcome) = h2_outcome { + // This shouldn't happen because the first header is saved to the first slot + // but Javascript supports this so we should too. + let header_bits: [bool; 2] = [!h2_outcome.header_bit, h2_outcome.header_bit]; + let oplog = Oplog { + header_bits, + entries_length: 0, + entries_byte_length: 0, + }; + OplogOpenOutcome::new( + oplog, + (*h2_outcome.state).decode(&existing)?, + Box::new([]), + ) + } else if let Some(key_pair) = key_pair { + // There is nothing in the oplog, start from fresh given key pair. + Self::fresh(key_pair.clone())? + } else { + // The storage is empty and no key pair given, erroring + return Err(HypercoreError::EmptyStorage { + store: Store::Oplog, + }); + }; + + // Read headers that might be stored in the existing content + if existing.len() > OplogSlot::Entries as usize { + let mut entry_offset = OplogSlot::Entries as usize; + let mut entries: Vec = Vec::new(); + let mut partials: Vec = Vec::new(); + while let Some(mut entry_outcome) = + Self::validate_leader(entry_offset, &existing)? + { + let entry: Entry = entry_outcome.state.decode(&existing)?; + entries.push(entry); + partials.push(entry_outcome.partial_bit); + entry_offset = (*entry_outcome.state).end(); + } + + // Remove all trailing partial entries + while !partials.is_empty() && partials[partials.len() - 1] { + entries.pop(); + } + outcome.entries = Some(entries.into_boxed_slice()); + } + Ok(Either::Right(outcome)) + } + } + } + + /// Appends an upgraded changeset to the Oplog. + pub(crate) fn append_changeset( + &mut self, + changeset: &MerkleTreeChangeset, + bitfield_update: Option, + atomic: bool, + header: &Header, + ) -> Result { + let mut header: Header = header.clone(); + let entry = self.update_header_with_changeset(changeset, bitfield_update, &mut header)?; + + Ok(OplogCreateHeaderOutcome { + header, + infos_to_flush: self.append_entries(&[entry], atomic)?, + }) + } + + pub(crate) fn update_header_with_changeset( + &mut self, + changeset: &MerkleTreeChangeset, + bitfield_update: Option, + header: &mut Header, + ) -> Result { + let tree_nodes: Vec = changeset.nodes.clone(); + let entry: Entry = if changeset.upgraded { + let hash = changeset + .hash + .as_ref() + .expect("Upgraded changeset must have a hash before appended"); + let signature = changeset + .signature + .expect("Upgraded changeset must be signed before appended"); + let signature: Box<[u8]> = signature.to_bytes().into(); + header.tree.root_hash = hash.clone(); + header.tree.signature = signature.clone(); + header.tree.length = changeset.length; + + Entry { + user_data: vec![], + tree_nodes, + tree_upgrade: Some(EntryTreeUpgrade { + fork: changeset.fork, + ancestors: changeset.ancestors, + length: changeset.length, + signature, + }), + bitfield: bitfield_update, + } + } else { + Entry { + user_data: vec![], + tree_nodes, + tree_upgrade: None, + bitfield: bitfield_update, + } + }; + Ok(entry) + } + + /// Clears a segment, returns infos to write to storage. + pub(crate) fn clear( + &mut self, + start: u64, + end: u64, + ) -> Result, HypercoreError> { + let entry: Entry = Entry { + user_data: vec![], + tree_nodes: vec![], + tree_upgrade: None, + bitfield: Some(BitfieldUpdate { + drop: true, + start, + length: end - start, + }), + }; + self.append_entries(&[entry], false) + } + + /// Flushes pending changes, returns infos to write to storage. + pub(crate) fn flush( + &mut self, + header: &Header, + clear_traces: bool, + ) -> Result, HypercoreError> { + let (new_header_bits, infos_to_flush) = if clear_traces { + // When clearing traces, both slots need to be cleared, hence + // do this twice, but for the first time, ignore the truncate + // store info, to end up with three StoreInfos. + let (new_header_bits, infos_to_flush) = + Self::insert_header(header, 0, self.header_bits, clear_traces)?; + let mut combined_infos_to_flush: Vec = + infos_to_flush.into_vec().drain(0..1).into_iter().collect(); + let (new_header_bits, infos_to_flush) = + Self::insert_header(header, 0, new_header_bits, clear_traces)?; + combined_infos_to_flush.extend(infos_to_flush.into_vec()); + (new_header_bits, combined_infos_to_flush.into_boxed_slice()) + } else { + Self::insert_header(header, 0, self.header_bits, clear_traces)? + }; + self.entries_byte_length = 0; + self.entries_length = 0; + self.header_bits = new_header_bits; + Ok(infos_to_flush) + } + + /// Appends a batch of entries to the Oplog. + fn append_entries( + &mut self, + batch: &[Entry], + atomic: bool, + ) -> Result, HypercoreError> { + let len = batch.len(); + let header_bit = self.get_current_header_bit(); + // Leave room for leaders + let mut state = HypercoreState::new_with_start_and_end(0, len * 8); + + for entry in batch.iter() { + state.preencode(entry)?; + } + + let mut buffer = state.create_buffer(); + for (i, entry) in batch.iter().enumerate() { + (*state).add_start(8)?; + let start = state.start(); + let partial_bit: bool = atomic && i < len - 1; + state.encode(entry, &mut buffer)?; + Self::prepend_leader( + state.start() - start, + header_bit, + partial_bit, + &mut state, + &mut buffer, + )?; + } + + let index = OplogSlot::Entries as u64 + self.entries_byte_length; + self.entries_length += len as u64; + self.entries_byte_length += buffer.len() as u64; + + Ok(vec![StoreInfo::new_content(Store::Oplog, index, &buffer)].into_boxed_slice()) + } + + fn fresh(key_pair: PartialKeypair) -> Result { + let entries_length: u64 = 0; + let entries_byte_length: u64 = 0; + let header = Header::new(key_pair); + let (header_bits, infos_to_flush) = + Self::insert_header(&header, entries_byte_length, INITIAL_HEADER_BITS, false)?; + let oplog = Oplog { + header_bits, + entries_length, + entries_byte_length, + }; + Ok(OplogOpenOutcome::from_create_header_outcome( + oplog, + OplogCreateHeaderOutcome { + header, + infos_to_flush, + }, + )) + } + + fn insert_header( + header: &Header, + entries_byte_length: u64, + current_header_bits: [bool; 2], + clear_traces: bool, + ) -> Result<([bool; 2], Box<[StoreInfo]>), HypercoreError> { + // The first 8 bytes will be filled with `prepend_leader`. + let data_start_index: usize = 8; + let mut state = HypercoreState::new_with_start_and_end(data_start_index, data_start_index); + + // Get the right slot and header bit + let (oplog_slot, header_bit) = + Oplog::get_next_header_oplog_slot_and_bit_value(¤t_header_bits); + let mut new_header_bits = current_header_bits; + match oplog_slot { + OplogSlot::FirstHeader => new_header_bits[0] = header_bit, + OplogSlot::SecondHeader => new_header_bits[1] = header_bit, + _ => { + panic!("Invalid oplog slot"); + } + } + + // Preencode the new header + (*state).preencode(header)?; + + // If clearing, lets add zeros to the end + let end = if clear_traces { + let end = state.end(); + state.set_end(HEADER_SIZE); + end + } else { + state.end() + }; + + // Create a buffer for the needed data + let mut buffer = state.create_buffer(); + + // Encode the header + (*state).encode(header, &mut buffer)?; + + // Finally prepend the buffer's 8 first bytes with a CRC, len and right bits + Self::prepend_leader( + end - data_start_index, + header_bit, + false, + &mut state, + &mut buffer, + )?; + + // The oplog is always truncated to the minimum byte size, which is right after + // all of the entries in the oplog finish. + let truncate_index = OplogSlot::Entries as u64 + entries_byte_length; + Ok(( + new_header_bits, + vec![ + StoreInfo::new_content(Store::Oplog, oplog_slot as u64, &buffer), + StoreInfo::new_truncate(Store::Oplog, truncate_index), + ] + .into_boxed_slice(), + )) + } + + /// Prepends given `State` with 4 bytes of CRC followed by 4 bytes containing length of + /// following buffer, 1 bit indicating which header is relevant to the entry (or if used to + /// wrap the actual header, then the header bit relevant for saving) and 1 bit that tells if + /// the written batch is only partially finished. For this to work, the state given must have + /// 8 bytes in reserve in the beginning, so that state.start can be set back 8 bytes. + fn prepend_leader( + len: usize, + header_bit: bool, + partial_bit: bool, + state: &mut HypercoreState, + buffer: &mut Box<[u8]>, + ) -> Result<(), HypercoreError> { + // The 4 bytes right before start of data is the length in 8+8+8+6=30 bits. The 31st bit is + // the partial bit and 32nd bit the header bit. + let start = (*state).start(); + (*state).set_start(start - len - 4)?; + let len_u32: u32 = len.try_into().unwrap(); + let partial_bit: u32 = if partial_bit { 2 } else { 0 }; + let header_bit: u32 = if header_bit { 1 } else { 0 }; + let combined: u32 = (len_u32 << 2) | header_bit | partial_bit; + state.encode_u32(combined, buffer)?; + + // Before that, is a 4 byte CRC32 that is a checksum of the above encoded 4 bytes and the + // content. + let start = state.start(); + state.set_start(start - 8)?; + let checksum = crc32fast::hash(&buffer[state.start() + 4..state.start() + 8 + len]); + state.encode_u32(checksum, buffer)?; + Ok(()) + } + + /// Validates that leader at given index is valid, and returns header and partial bits and + /// `State` for the header/entry that the leader was for. + fn validate_leader( + index: usize, + buffer: &[u8], + ) -> Result, HypercoreError> { + if buffer.len() < index + 8 { + return Ok(None); + } + let mut state = HypercoreState::new_with_start_and_end(index, buffer.len()); + let stored_checksum: u32 = state.decode_u32(buffer)?; + let combined: u32 = state.decode_u32(buffer)?; + let len = usize::try_from(combined >> 2) + .expect("Attempted converting to a 32 bit usize on below 32 bit system"); + + // NB: In the Javascript version IIUC zero length is caught only with a mismatch + // of checksums, which is silently interpreted to only mean "no value". That doesn't sound good: + // better to throw an error on mismatch and let the caller at least log the problem. + if len == 0 || state.end() - state.start() < len { + return Ok(None); + } + let header_bit = combined & 1 == 1; + let partial_bit = combined & 2 == 2; + + let new_start = index + 8; + state.set_end(new_start + len); + state.set_start(new_start)?; + + let calculated_checksum = crc32fast::hash(&buffer[index + 4..state.end()]); + if calculated_checksum != stored_checksum { + return Err(HypercoreError::InvalidChecksum { + context: "Calculated signature does not match oplog signature".to_string(), + }); + }; + + Ok(Some(ValidateLeaderOutcome { + header_bit, + partial_bit, + state, + })) + } + + /// Gets the current header bit + fn get_current_header_bit(&self) -> bool { + self.header_bits[0] != self.header_bits[1] + } + + /// Based on given header_bits, determines if saving the header should be done to the first + /// header slot or the second header slot and the bit that it should get. + fn get_next_header_oplog_slot_and_bit_value(header_bits: &[bool; 2]) -> (OplogSlot, bool) { + // Writing a header to the disk is most efficient when only one area is saved. + // This makes it a bit less obvious to find out which of the headers is older + // and which newer. The bits indicate the header slot index in this way: + // + // [true, false] => [false, false] => [false, true] => [true, true] => [true, false] ... + // First => Second => First => Second => First + if header_bits[0] != header_bits[1] { + // First slot + (OplogSlot::FirstHeader, !header_bits[0]) + } else { + // Second slot + (OplogSlot::SecondHeader, !header_bits[1]) + } + } +} diff --git a/src/prelude.rs b/src/prelude.rs index eeede9a9..0dd26ea4 100644 --- a/src/prelude.rs +++ b/src/prelude.rs @@ -1,9 +1,5 @@ //! Convenience wrapper to import all of Hypercore's core. -//! -//! ```rust -//! use hypercore::prelude::*; -//! let feed = Feed::default(); -//! ``` -pub use crate::feed::Feed; -// pub use feed_builder::FeedBuilder; -pub use crate::storage::{Node, NodeTrait, Storage, Store}; +pub use crate::common::{HypercoreError, Store}; +pub use crate::core::Hypercore; +pub use crate::crypto::PartialKeypair; +pub use crate::storage::Storage; diff --git a/src/proof.rs b/src/proof.rs deleted file mode 100644 index e7d4cd42..00000000 --- a/src/proof.rs +++ /dev/null @@ -1,30 +0,0 @@ -use crate::Node; -use crate::Signature; - -/// A merkle proof for an index, created by the `.proof()` method. -#[derive(Debug, PartialEq, Clone)] -pub struct Proof { - /// The index to which this proof corresponds. - pub index: u64, - /// Nodes that verify the index you passed. - pub nodes: Vec, - /// An `ed25519` signature, guaranteeing the integrity of the nodes. - pub signature: Option, -} - -impl Proof { - /// Access the `index` field from the proof. - pub fn index(&self) -> u64 { - self.index - } - - /// Access the `nodes` field from the proof. - pub fn nodes(&self) -> &[Node] { - &self.nodes - } - - /// Access the `signature` field from the proof. - pub fn signature(&self) -> Option<&Signature> { - self.signature.as_ref() - } -} diff --git a/src/replicate/message.rs b/src/replicate/message.rs deleted file mode 100644 index 8a8887e9..00000000 --- a/src/replicate/message.rs +++ /dev/null @@ -1,6 +0,0 @@ -/// A message sent over the network. -#[derive(Debug, Clone, PartialEq)] -pub struct Message { - start: u64, - length: Option, -} diff --git a/src/replicate/mod.rs b/src/replicate/mod.rs deleted file mode 100644 index d0f44e5c..00000000 --- a/src/replicate/mod.rs +++ /dev/null @@ -1,5 +0,0 @@ -mod message; -mod peer; - -pub use self::message::Message; -pub use self::peer::Peer; diff --git a/src/replicate/peer.rs b/src/replicate/peer.rs deleted file mode 100644 index 58e9357b..00000000 --- a/src/replicate/peer.rs +++ /dev/null @@ -1,40 +0,0 @@ -// use sparse_bitfield::Bitfield; - -use super::Message; - -/// A peer on the network. -// omitted fields: [ -// feed, -// stream, -// inflightRequests, -// ] -#[derive(Debug, Clone, PartialEq)] -pub struct Peer { - // remote_id: u64, -// remote_length: u64, -// remote_bitfield: Bitfield, -// remote_is_want: bool, -// remote_is_downloading: bool, -// is_live: bool, -// is_sparse: bool, -// is_downloading: bool, -// is_uploading: bool, -// max_requests: u16, -} - -impl Peer { - /// Check if the peer has a message. - pub fn have(&mut self, _msg: &Message) { - unimplemented!(); - } - - /// Tell a peer you no longer have a message. - pub fn unhave(&mut self, _msg: &Message) { - unimplemented!(); - } - - /// Update. - pub fn update(&mut self) { - unimplemented!(); - } -} diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 0d5fa2ba..7eb3776d 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -1,48 +1,19 @@ //! Save data to a desired storage backend. -mod node; -mod persist; - -pub use self::node::Node; -pub use self::persist::Persist; -pub use merkle_tree_stream::Node as NodeTrait; - -use anyhow::{anyhow, ensure, Result}; -use ed25519_dalek::{PublicKey, SecretKey, Signature, PUBLIC_KEY_LENGTH, SECRET_KEY_LENGTH}; -use flat_tree as flat; use futures::future::FutureExt; +#[cfg(not(target_arch = "wasm32"))] use random_access_disk::RandomAccessDisk; use random_access_memory::RandomAccessMemory; -use random_access_storage::RandomAccess; -use sleep_parser::*; -use std::borrow::Borrow; -use std::convert::TryFrom; +use random_access_storage::{RandomAccess, RandomAccessError}; use std::fmt::Debug; -use std::ops::Range; +#[cfg(not(target_arch = "wasm32"))] use std::path::PathBuf; +use tracing::instrument; -const HEADER_OFFSET: u64 = 32; - -#[derive(Debug)] -pub struct PartialKeypair { - pub public: PublicKey, - pub secret: Option, -} - -/// The types of stores that can be created. -#[derive(Debug)] -pub enum Store { - /// Tree - Tree, - /// Data - Data, - /// Bitfield - Bitfield, - /// Signatures - Signatures, - /// Keypair - Keypair, -} +use crate::{ + common::{Store, StoreInfo, StoreInfoInstruction, StoreInfoType}, + HypercoreError, +}; /// Save data to a desired storage backend. #[derive(Debug)] @@ -53,387 +24,251 @@ where tree: T, data: T, bitfield: T, - signatures: T, - keypair: T, + oplog: T, +} + +pub(crate) fn map_random_access_err(err: RandomAccessError) -> HypercoreError { + match err { + RandomAccessError::IO { + return_code, + context, + source, + } => HypercoreError::IO { + context: Some(format!( + "RandomAccess IO error. Context: {context:?}, return_code: {return_code:?}", + )), + source, + }, + RandomAccessError::OutOfBounds { + offset, + end, + length, + } => HypercoreError::InvalidOperation { + context: format!( + "RandomAccess out of bounds. Offset: {offset}, end: {end:?}, length: {length}", + ), + }, + } } impl Storage where - T: RandomAccess> + Debug + Send, + T: RandomAccess + Debug + Send, { - /// Create a new instance. Takes a keypair and a callback to create new - /// storage instances. - // Named `.open()` in the JS version. Replaces the `.openKey()` method too by - // requiring a key pair to be initialized before creating a new instance. - pub async fn new(create: Cb, overwrite: bool) -> Result + /// Create a new instance. Takes a callback to create new storage instances and overwrite flag. + pub async fn open(create: Cb, overwrite: bool) -> Result where - Cb: Fn(Store) -> std::pin::Pin> + Send>>, + Cb: Fn( + Store, + ) -> std::pin::Pin< + Box> + Send>, + >, { - let tree = create(Store::Tree).await?; - let data = create(Store::Data).await?; - let bitfield = create(Store::Bitfield).await?; - let signatures = create(Store::Signatures).await?; - let keypair = create(Store::Keypair).await?; - let mut instance = Self { + let mut tree = create(Store::Tree).await.map_err(map_random_access_err)?; + let mut data = create(Store::Data).await.map_err(map_random_access_err)?; + let mut bitfield = create(Store::Bitfield) + .await + .map_err(map_random_access_err)?; + let mut oplog = create(Store::Oplog).await.map_err(map_random_access_err)?; + + if overwrite { + if tree.len().await.map_err(map_random_access_err)? > 0 { + tree.truncate(0).await.map_err(map_random_access_err)?; + } + if data.len().await.map_err(map_random_access_err)? > 0 { + data.truncate(0).await.map_err(map_random_access_err)?; + } + if bitfield.len().await.map_err(map_random_access_err)? > 0 { + bitfield.truncate(0).await.map_err(map_random_access_err)?; + } + if oplog.len().await.map_err(map_random_access_err)? > 0 { + oplog.truncate(0).await.map_err(map_random_access_err)?; + } + } + + let instance = Self { tree, data, bitfield, - signatures, - keypair, + oplog, }; - if overwrite || instance.bitfield.len().await.unwrap_or(0) == 0 { - let header = create_bitfield(); - instance - .bitfield - .write(0, &header.to_vec()) - .await - .map_err(|e| anyhow!(e))?; - } - - if overwrite || instance.signatures.len().await.unwrap_or(0) == 0 { - let header = create_signatures(); - instance - .signatures - .write(0, &header.to_vec()) - .await - .map_err(|e| anyhow!(e))?; - } - - if overwrite || instance.tree.len().await.unwrap_or(0) == 0 { - let header = create_tree(); - instance - .tree - .write(0, &header.to_vec()) - .await - .map_err(|e| anyhow!(e))?; - } - Ok(instance) } - /// Write data to the feed. - #[inline] - pub async fn write_data(&mut self, offset: u64, data: &[u8]) -> Result<()> { - self.data.write(offset, &data).await.map_err(|e| anyhow!(e)) - } - - /// Write a byte vector to a data storage (random-access instance) at the - /// position of `index`. - /// - /// NOTE: Meant to be called from the `.put()` feed method. Probably used to - /// insert data as-is after receiving it from the network (need to confirm - /// with mafintosh). - /// TODO: Ensure the signature size is correct. - /// NOTE: Should we create a `Data` entry type? - pub async fn put_data(&mut self, index: u64, data: &[u8], nodes: &[Node]) -> Result<()> { - if data.is_empty() { - return Ok(()); - } - - let range = self.data_offset(index, nodes).await?; - - ensure!( - (range.end - range.start) as usize == data.len(), - format!("length `{:?} != {:?}`", range.count(), data.len()) - ); - - self.data - .write(range.start, data) - .await - .map_err(|e| anyhow!(e)) - } - - /// Get data from disk that the user has written to it. This is stored - /// unencrypted, so there's no decryption needed. - // FIXME: data_offset always reads out index 0, length 0 - #[inline] - pub async fn get_data(&mut self, index: u64) -> Result> { - let cached_nodes = Vec::new(); // TODO: reuse allocation. - let range = self.data_offset(index, &cached_nodes).await?; - self.data - .read(range.start, range.count() as u64) - .await - .map_err(|e| anyhow!(e)) - } - - /// Search the signature stores for a `Signature`, starting at `index`. - pub fn next_signature( + /// Read info from store based on given instruction. Convenience method to `read_infos`. + pub(crate) async fn read_info( &mut self, - index: u64, - ) -> futures::future::BoxFuture<'_, Result> { - let bytes = async_std::task::block_on(async { - self.signatures - .read(HEADER_OFFSET + 64 * index, 64) - .await - .map_err(|e| anyhow!(e)) - }); - async move { - let bytes = bytes?; - if not_zeroes(&bytes) { - Ok(Signature::try_from(&bytes[..])?) - } else { - Ok(self.next_signature(index + 1).await?) - } - } - .boxed() + info_instruction: StoreInfoInstruction, + ) -> Result { + let mut infos = self.read_infos_to_vec(&[info_instruction]).await?; + Ok(infos + .pop() + .expect("Should have gotten one info with one instruction")) } - /// Get a `Signature` from the store. - #[inline] - pub async fn get_signature(&mut self, index: u64) -> Result { - let bytes = self - .signatures - .read(HEADER_OFFSET + 64 * index, 64) - .await - .map_err(|e| anyhow!(e))?; - ensure!(not_zeroes(&bytes), "No signature found"); - Ok(Signature::try_from(&bytes[..])?) - } - - /// Write a `Signature` to `self.Signatures`. - /// TODO: Ensure the signature size is correct. - /// NOTE: Should we create a `Signature` entry type? - #[inline] - pub async fn put_signature( + /// Read infos from stores based on given instructions + pub(crate) async fn read_infos( &mut self, - index: u64, - signature: impl Borrow, - ) -> Result<()> { - let signature = signature.borrow(); - self.signatures - .write(HEADER_OFFSET + 64 * index, &signature.to_bytes()) - .await - .map_err(|e| anyhow!(e)) + info_instructions: &[StoreInfoInstruction], + ) -> Result, HypercoreError> { + let infos = self.read_infos_to_vec(info_instructions).await?; + Ok(infos.into_boxed_slice()) } - /// TODO(yw) docs - /// Get the offset for the data, return `(offset, size)`. - /// - /// ## Panics - /// A panic can occur if no maximum value is found. - pub async fn data_offset(&mut self, index: u64, cached_nodes: &[Node]) -> Result> { - let mut roots = Vec::new(); // TODO: reuse alloc - flat::full_roots(tree_index(index), &mut roots); - - let mut offset = 0; - let mut pending = roots.len() as u64; - let block_index = tree_index(index); - - if pending == 0 { - let len = match find_node(&cached_nodes, block_index) { - Some(node) => node.len(), - None => (self.get_node(block_index).await?).len(), - }; - return Ok(offset..offset + len); + /// Reads infos but retains them as a Vec + pub(crate) async fn read_infos_to_vec( + &mut self, + info_instructions: &[StoreInfoInstruction], + ) -> Result, HypercoreError> { + if info_instructions.is_empty() { + return Ok(vec![]); } - - for root in roots { - // FIXME: we're always having a cache miss here. Check cache first before - // getting a node from the backend. - // - // ```rust - // let node = match find_node(cached_nodes, root) { - // Some(node) => node, - // None => self.get_node(root), - // }; - // ``` - let node = self.get_node(root).await?; - - offset += node.len(); - pending -= 1; - if pending > 0 { - continue; + let mut current_store: Store = info_instructions[0].store.clone(); + let mut storage = self.get_random_access(¤t_store); + let mut infos: Vec = Vec::with_capacity(info_instructions.len()); + for instruction in info_instructions.iter() { + if instruction.store != current_store { + current_store = instruction.store.clone(); + storage = self.get_random_access(¤t_store); + } + match instruction.info_type { + StoreInfoType::Content => { + let read_length = match instruction.length { + Some(length) => length, + None => storage.len().await.map_err(map_random_access_err)?, + }; + let read_result = storage.read(instruction.index, read_length).await; + let info: StoreInfo = match read_result { + Ok(buf) => Ok(StoreInfo::new_content( + instruction.store.clone(), + instruction.index, + &buf, + )), + Err(RandomAccessError::OutOfBounds { + offset: _, + end: _, + length, + }) => { + if instruction.allow_miss { + Ok(StoreInfo::new_content_miss( + instruction.store.clone(), + instruction.index, + )) + } else { + Err(HypercoreError::InvalidOperation { + context: format!( + "Could not read from store {}, index {} / length {} is out of bounds for store length {}", + instruction.index, + read_length, + current_store, + length + ), + }) + } + } + Err(e) => Err(map_random_access_err(e)), + }?; + infos.push(info); + } + StoreInfoType::Size => { + let length = storage.len().await.map_err(map_random_access_err)?; + infos.push(StoreInfo::new_size( + instruction.store.clone(), + instruction.index, + length - instruction.index, + )); + } } - - let len = match find_node(&cached_nodes, block_index) { - Some(node) => node.len(), - None => (self.get_node(block_index).await?).len(), - }; - - return Ok(offset..offset + len); } - - unreachable!(); + Ok(infos) } - /// Get a `Node` from the `tree` storage. - #[inline] - pub async fn get_node(&mut self, index: u64) -> Result { - let buf = self - .tree - .read(HEADER_OFFSET + 40 * index, 40) - .await - .map_err(|e| anyhow!(e))?; - let node = Node::from_bytes(index, &buf)?; - Ok(node) + /// Flush info to storage. Convenience method to `flush_infos`. + pub(crate) async fn flush_info(&mut self, slice: StoreInfo) -> Result<(), HypercoreError> { + self.flush_infos(&[slice]).await } - /// Write a `Node` to the `tree` storage. - /// TODO: prevent extra allocs here. Implement a method on node that can reuse - /// a buffer. - #[inline] - pub async fn put_node(&mut self, node: &Node) -> Result<()> { - let index = node.index(); - let buf = node.to_bytes()?; - self.tree - .write(HEADER_OFFSET + 40 * index, &buf) - .await - .map_err(|e| anyhow!(e)) - } - - /// Write data to the internal bitfield module. - /// TODO: Ensure the chunk size is correct. - /// NOTE: Should we create a bitfield entry type? - #[inline] - pub async fn put_bitfield(&mut self, offset: u64, data: &[u8]) -> Result<()> { - self.bitfield - .write(HEADER_OFFSET + offset, data) - .await - .map_err(|e| anyhow!(e)) - } - - /// Read bitfield header. - pub async fn read_bitfield(&mut self) -> Result> { - let buf = self - .bitfield - .read(0, 32) - .await - .map_err(|_| anyhow::anyhow!("read bitfield header"))?; - let header = Header::from_vec(&buf).map_err(|e| anyhow::anyhow!(e))?; - - // khodzha: - // TODO: we should handle eof vs errors here somehow but idk how to do that - let mut buf: Vec = Vec::new(); - let mut idx: u64 = 0; - let ent_size: u64 = header.entry_size as u64; - loop { - let result = self - .bitfield - .read(HEADER_OFFSET + idx * ent_size, ent_size) - .await; - if let Ok(slice) = result { - buf.extend_from_slice(&slice); - idx += 1; - } else { - return Ok(buf); + /// Flush infos to storage + pub(crate) async fn flush_infos(&mut self, infos: &[StoreInfo]) -> Result<(), HypercoreError> { + if infos.is_empty() { + return Ok(()); + } + let mut current_store: Store = infos[0].store.clone(); + let mut storage = self.get_random_access(¤t_store); + for info in infos.iter() { + if info.store != current_store { + current_store = info.store.clone(); + storage = self.get_random_access(¤t_store); + } + match info.info_type { + StoreInfoType::Content => { + if !info.miss { + if let Some(data) = &info.data { + storage + .write(info.index, data) + .await + .map_err(map_random_access_err)?; + } + } else { + storage + .del( + info.index, + info.length.expect("When deleting, length must be given"), + ) + .await + .map_err(map_random_access_err)?; + } + } + StoreInfoType::Size => { + if info.miss { + storage + .truncate(info.index) + .await + .map_err(map_random_access_err)?; + } else { + panic!("Flushing a size that isn't miss, is not supported"); + } + } } } + Ok(()) } - /// Read a public key from storage - pub async fn read_public_key(&mut self) -> Result { - let buf = self - .keypair - .read(0, PUBLIC_KEY_LENGTH as u64) - .await - .map_err(|e| anyhow!(e))?; - let public_key = PublicKey::from_bytes(&buf)?; - Ok(public_key) - } - - /// Read a secret key from storage - pub async fn read_secret_key(&mut self) -> Result { - let buf = self - .keypair - .read(PUBLIC_KEY_LENGTH as u64, SECRET_KEY_LENGTH as u64) - .await - .map_err(|e| anyhow!(e))?; - let secret_key = SecretKey::from_bytes(&buf)?; - Ok(secret_key) - } - - /// Write a public key to the storage - pub async fn write_public_key(&mut self, public_key: &PublicKey) -> Result<()> { - let buf: [u8; PUBLIC_KEY_LENGTH] = public_key.to_bytes(); - self.keypair.write(0, &buf).await.map_err(|e| anyhow!(e)) - } - - /// Write a secret key to the storage - pub async fn write_secret_key(&mut self, secret_key: &SecretKey) -> Result<()> { - let buf: [u8; SECRET_KEY_LENGTH] = secret_key.to_bytes(); - self.keypair - .write(PUBLIC_KEY_LENGTH as u64, &buf) - .await - .map_err(|e| anyhow!(e)) - } - - /// Tries to read a partial keypair (ie: with an optional secret_key) from the storage - pub async fn read_partial_keypair(&mut self) -> Option { - match self.read_public_key().await { - Ok(public) => match self.read_secret_key().await { - Ok(secret) => Some(PartialKeypair { - public, - secret: Some(secret), - }), - Err(_) => Some(PartialKeypair { - public, - secret: None, - }), - }, - Err(_) => None, + fn get_random_access(&mut self, store: &Store) -> &mut T { + match store { + Store::Tree => &mut self.tree, + Store::Data => &mut self.data, + Store::Bitfield => &mut self.bitfield, + Store::Oplog => &mut self.oplog, } } } impl Storage { - /// Create a new instance backed by a `RandomAccessMemory` instance. - pub async fn new_memory() -> Result { + /// New storage backed by a `RandomAccessMemory` instance. + #[instrument(err)] + pub async fn new_memory() -> Result { let create = |_| async { Ok(RandomAccessMemory::default()) }.boxed(); - Ok(Self::new(create, true).await?) + // No reason to overwrite, as this is a new memory segment + Self::open(create, false).await } } +#[cfg(not(target_arch = "wasm32"))] impl Storage { - /// Create a new instance backed by a `RandomAccessDisk` instance. - pub async fn new_disk(dir: &PathBuf, overwrite: bool) -> Result { - let storage = |storage: Store| { - let name = match storage { + /// New storage backed by a `RandomAccessDisk` instance. + #[instrument(err)] + pub async fn new_disk(dir: &PathBuf, overwrite: bool) -> Result { + let storage = |store: Store| { + let name = match store { Store::Tree => "tree", Store::Data => "data", Store::Bitfield => "bitfield", - Store::Signatures => "signatures", - Store::Keypair => "key", + Store::Oplog => "oplog", }; RandomAccessDisk::open(dir.as_path().join(name)).boxed() }; - Ok(Self::new(storage, overwrite).await?) - } -} - -/// Get a node from a vector of nodes. -#[inline] -fn find_node(nodes: &[Node], index: u64) -> Option<&Node> { - for node in nodes { - if node.index() == index { - return Some(node); - } - } - None -} - -/// Check if a byte slice is not completely zero-filled. -#[inline] -fn not_zeroes(bytes: &[u8]) -> bool { - for byte in bytes { - if *byte != 0 { - return true; - } + Self::open(storage, overwrite).await } - false -} - -/// Convert the index to the index in the tree. -#[inline] -fn tree_index(index: u64) -> u64 { - 2 * index -} - -#[test] -fn should_detect_zeroes() { - let nums = vec![0; 10]; - assert!(!not_zeroes(&nums)); - - let nums = vec![1; 10]; - assert!(not_zeroes(&nums)); } diff --git a/src/storage/persist.rs b/src/storage/persist.rs deleted file mode 100644 index 70a3ec0a..00000000 --- a/src/storage/persist.rs +++ /dev/null @@ -1,19 +0,0 @@ -use super::Storage; -use anyhow::Result; -use random_access_storage::RandomAccess; -use std::fmt::Debug; - -/// Persist data to a `Storage` instance. -pub trait Persist -where - T: RandomAccess + Debug, -{ - /// Create an instance from a byte vector. - fn from_bytes(index: u64, buf: &[u8]) -> Self; - - /// Create a vector. - fn to_vec(&self) -> Result>; - - /// Persist into a storage backend. - fn store(&self, index: u64, store: Storage) -> Result<()>; -} diff --git a/src/tree/merkle_tree.rs b/src/tree/merkle_tree.rs new file mode 100644 index 00000000..c9579199 --- /dev/null +++ b/src/tree/merkle_tree.rs @@ -0,0 +1,1616 @@ +use compact_encoding::State; +use ed25519_dalek::Signature; +use futures::future::Either; +use intmap::IntMap; +#[cfg(feature = "cache")] +use moka::sync::Cache; +use std::convert::TryFrom; + +#[cfg(feature = "cache")] +use crate::common::cache::CacheOptions; +use crate::common::{HypercoreError, NodeByteRange, Proof, ValuelessProof}; +use crate::crypto::Hash; +use crate::oplog::HeaderTree; +use crate::{ + common::{StoreInfo, StoreInfoInstruction}, + Node, VerifyingKey, +}; +use crate::{ + DataBlock, DataHash, DataSeek, DataUpgrade, RequestBlock, RequestSeek, RequestUpgrade, Store, +}; + +use super::MerkleTreeChangeset; + +/// Merkle tree. +/// See https://github.com/hypercore-protocol/hypercore/blob/master/lib/merkle-tree.js +#[derive(Debug)] +pub(crate) struct MerkleTree { + pub(crate) roots: Vec, + pub(crate) length: u64, + pub(crate) byte_length: u64, + pub(crate) fork: u64, + pub(crate) signature: Option, + unflushed: IntMap, + truncated: bool, + truncate_to: u64, + #[cfg(feature = "cache")] + node_cache: Option>, +} + +const NODE_SIZE: u64 = 40; + +impl MerkleTree { + /// Opens MerkleTree, based on read infos. + pub(crate) fn open( + header_tree: &HeaderTree, + infos: Option<&[StoreInfo]>, + #[cfg(feature = "cache")] node_cache_options: &Option, + ) -> Result, Self>, HypercoreError> { + match infos { + None => { + let root_indices = get_root_indices(&header_tree.length); + + Ok(Either::Left( + root_indices + .iter() + .map(|&index| { + StoreInfoInstruction::new_content( + Store::Tree, + NODE_SIZE * index, + NODE_SIZE, + ) + }) + .collect::>() + .into_boxed_slice(), + )) + } + Some(infos) => { + let root_indices = get_root_indices(&header_tree.length); + + let mut roots: Vec = Vec::with_capacity(infos.len()); + let mut byte_length: u64 = 0; + let mut length: u64 = 0; + + for i in 0..root_indices.len() { + let index = root_indices[i]; + if index != index_from_info(&infos[i]) { + return Err(HypercoreError::CorruptStorage { + store: Store::Tree, + context: Some( + "Given slices vector not in the correct order".to_string(), + ), + }); + } + let data = infos[i].data.as_ref().unwrap(); + let node = node_from_bytes(&index, data)?; + byte_length += node.length; + // This is totalSpan in Javascript + length += 2 * ((node.index - length) + 1); + + roots.push(node); + } + if length > 0 { + length /= 2; + } + let signature: Option = if header_tree.signature.len() > 0 { + Some( + Signature::try_from(&*header_tree.signature).map_err(|_err| { + HypercoreError::InvalidSignature { + context: "Could not parse signature".to_string(), + } + })?, + ) + } else { + None + }; + + Ok(Either::Right(Self { + #[cfg(feature = "cache")] + node_cache: node_cache_options + .as_ref() + .map(|opts| opts.to_node_cache(roots.clone())), + roots, + length, + byte_length, + fork: header_tree.fork, + unflushed: IntMap::new(), + truncated: false, + truncate_to: 0, + signature, + })) + } + } + } + + /// Initialize a changeset for this tree. + /// This is called batch() in Javascript, see: + /// https://github.com/hypercore-protocol/hypercore/blob/master/lib/merkle-tree.js + pub(crate) fn changeset(&self) -> MerkleTreeChangeset { + MerkleTreeChangeset::new(self.length, self.byte_length, self.fork, self.roots.clone()) + } + + /// Commit a created changeset to the tree. + pub(crate) fn commit(&mut self, changeset: MerkleTreeChangeset) -> Result<(), HypercoreError> { + if !self.commitable(&changeset) { + return Err(HypercoreError::InvalidOperation { + context: "Tree was modified during changeset, refusing to commit".to_string(), + }); + } + + if changeset.upgraded { + self.commit_truncation(&changeset); + + self.roots = changeset.roots; + self.length = changeset.length; + self.byte_length = changeset.byte_length; + self.fork = changeset.fork; + self.signature = changeset.signature; + } + + for node in changeset.nodes { + self.unflushed.insert(node.index, node); + } + + Ok(()) + } + + /// Flush committed made changes to the tree + pub(crate) fn flush(&mut self) -> Box<[StoreInfo]> { + let mut infos_to_flush: Vec = Vec::new(); + if self.truncated { + infos_to_flush.extend(self.flush_truncation()); + } + infos_to_flush.extend(self.flush_nodes()); + infos_to_flush.into_boxed_slice() + } + + /// Get storage byte range of given hypercore index + pub(crate) fn byte_range( + &mut self, + hypercore_index: u64, + infos: Option<&[StoreInfo]>, + ) -> Result, NodeByteRange>, HypercoreError> { + let index = self.validate_hypercore_index(hypercore_index)?; + // Get nodes out of incoming infos + let nodes: IntMap> = self.infos_to_nodes(infos)?; + + // Start with getting the requested node, which will get the length + // of the byte range + let length_result = self.required_node(index, &nodes)?; + + // As for the offset, that might require fetching a lot more nodes whose + // lengths to sum + let offset_result = self.byte_offset_from_nodes(index, &nodes)?; + + // Construct response of either instructions (Left) or the result (Right) + let mut instructions: Vec = Vec::new(); + let mut byte_range = NodeByteRange { + index: 0, + length: 0, + }; + match length_result { + Either::Left(instruction) => { + instructions.push(instruction); + } + Either::Right(node) => { + byte_range.length = node.length; + } + } + match offset_result { + Either::Left(offset_instructions) => { + instructions.extend(offset_instructions); + } + Either::Right(offset) => { + byte_range.index = offset; + } + } + + if instructions.is_empty() { + Ok(Either::Right(byte_range)) + } else { + Ok(Either::Left(instructions.into_boxed_slice())) + } + } + + /// Get the byte offset given hypercore index + pub(crate) fn byte_offset( + &mut self, + hypercore_index: u64, + infos: Option<&[StoreInfo]>, + ) -> Result, u64>, HypercoreError> { + let index = self.validate_hypercore_index(hypercore_index)?; + self.byte_offset_from_index(index, infos) + } + + /// Get the byte offset of hypercore index in a changeset + pub(crate) fn byte_offset_in_changeset( + &mut self, + hypercore_index: u64, + changeset: &MerkleTreeChangeset, + infos: Option<&[StoreInfo]>, + ) -> Result, u64>, HypercoreError> { + if self.length == hypercore_index { + return Ok(Either::Right(self.byte_length)); + } + let index = hypercore_index_into_merkle_tree_index(hypercore_index); + let mut iter = flat_tree::Iterator::new(index); + let mut tree_offset = 0; + let mut is_right = false; + let mut parent: Option = None; + for node in &changeset.nodes { + if node.index == iter.index() { + if is_right { + if let Some(parent) = parent { + tree_offset += node.length - parent.length; + } + } + parent = Some(node.clone()); + is_right = iter.is_right(); + iter.parent(); + } + } + + let search_index = if let Some(parent) = parent { + let r = changeset + .roots + .iter() + .position(|root| root.index == parent.index); + if let Some(r) = r { + for i in 0..r { + tree_offset += self.roots[i].length; + } + return Ok(Either::Right(tree_offset)); + } + parent.index + } else { + index + }; + + match self.byte_offset_from_index(search_index, infos)? { + Either::Left(instructions) => Ok(Either::Left(instructions)), + Either::Right(offset) => Ok(Either::Right(offset + tree_offset)), + } + } + + pub(crate) fn add_node(&mut self, node: Node) { + self.unflushed.insert(node.index, node); + } + + pub(crate) fn truncate( + &mut self, + length: u64, + fork: u64, + infos: Option<&[StoreInfo]>, + ) -> Result, MerkleTreeChangeset>, HypercoreError> { + let head = length * 2; + let mut full_roots = vec![]; + flat_tree::full_roots(head, &mut full_roots); + let nodes: IntMap> = self.infos_to_nodes(infos)?; + let mut changeset = self.changeset(); + + let mut instructions: Vec = Vec::new(); + for (i, root) in full_roots.iter().enumerate() { + if i < changeset.roots.len() && changeset.roots[i].index == *root { + continue; + } + while changeset.roots.len() > i { + changeset.roots.pop(); + } + + let node_or_instruction = self.required_node(*root, &nodes)?; + match node_or_instruction { + Either::Left(instruction) => { + instructions.push(instruction); + } + Either::Right(node) => { + changeset.roots.push(node); + } + } + } + + if instructions.is_empty() { + while changeset.roots.len() > full_roots.len() { + changeset.roots.pop(); + } + changeset.fork = fork; + changeset.length = length; + changeset.ancestors = length; + changeset.byte_length = changeset + .roots + .iter() + .fold(0, |acc, node| acc + node.length); + changeset.upgraded = true; + Ok(Either::Right(changeset)) + } else { + Ok(Either::Left(instructions.into_boxed_slice())) + } + } + + /// Creates valueless proof from requests. + /// TODO: This is now just a clone of javascript's + /// https://github.com/holepunchto/hypercore/blob/9ce03363cb8938dbab53baba7d7cc9dde0508a7e/lib/merkle-tree.js#L1181 + /// The implementation should be rewritten to make it clearer. + pub(crate) fn create_valueless_proof( + &mut self, + block: Option<&RequestBlock>, + hash: Option<&RequestBlock>, + seek: Option<&RequestSeek>, + upgrade: Option<&RequestUpgrade>, + infos: Option<&[StoreInfo]>, + ) -> Result, ValuelessProof>, HypercoreError> { + let nodes: IntMap> = self.infos_to_nodes(infos)?; + let mut instructions: Vec = Vec::new(); + let fork = self.fork; + let signature = self.signature; + let head = 2 * self.length; + let (from, to) = if let Some(upgrade) = upgrade.as_ref() { + let from = upgrade.start * 2; + (from, from + upgrade.length * 2) + } else { + (0, head) + }; + let indexed = normalize_indexed(block, hash); + + if from >= to || to > head { + return Err(HypercoreError::InvalidOperation { + context: "Invalid upgrade".to_string(), + }); + } + + let mut sub_tree = head; + let mut p = LocalProof { + seek: None, + nodes: None, + upgrade: None, + additional_upgrade: None, + }; + let mut untrusted_sub_tree = false; + if let Some(indexed) = indexed.as_ref() { + if seek.is_some() && upgrade.is_some() && indexed.index >= from { + return Err(HypercoreError::InvalidOperation { + context: "Cannot both do a seek and block/hash request when upgrading" + .to_string(), + }); + } + + if let Some(upgrade) = upgrade.as_ref() { + untrusted_sub_tree = indexed.last_index < upgrade.start; + } else { + untrusted_sub_tree = true; + } + + if untrusted_sub_tree { + sub_tree = nodes_to_root(indexed.index, indexed.nodes, to)?; + let seek_root = if let Some(seek) = seek.as_ref() { + let index_or_instructions = + self.seek_untrusted_tree(sub_tree, seek.bytes, &nodes)?; + match index_or_instructions { + Either::Left(new_instructions) => { + instructions.extend(new_instructions); + return Ok(Either::Left(instructions.into_boxed_slice())); + } + Either::Right(index) => index, + } + } else { + head + }; + if let Either::Left(new_instructions) = self.block_and_seek_proof( + Some(indexed), + seek.is_some(), + seek_root, + sub_tree, + &mut p, + &nodes, + )? { + instructions.extend(new_instructions); + } + } else if upgrade.is_some() { + sub_tree = indexed.index; + } + } + if !untrusted_sub_tree { + if let Some(seek) = seek.as_ref() { + let index_or_instructions = self.seek_from_head(to, seek.bytes, &nodes)?; + sub_tree = match index_or_instructions { + Either::Left(new_instructions) => { + instructions.extend(new_instructions); + return Ok(Either::Left(instructions.into_boxed_slice())); + } + Either::Right(index) => index, + }; + } + } + + if upgrade.is_some() { + if let Either::Left(new_instructions) = self.upgrade_proof( + indexed.as_ref(), + seek.is_some(), + from, + to, + sub_tree, + &mut p, + &nodes, + )? { + instructions.extend(new_instructions); + } + + if head > to { + if let Either::Left(new_instructions) = + self.additional_upgrade_proof(to, head, &mut p, &nodes)? + { + instructions.extend(new_instructions); + } + } + } + + if instructions.is_empty() { + let (data_block, data_hash): (Option, Option) = + if let Some(block) = block.as_ref() { + ( + Some(DataHash { + index: block.index, + nodes: p.nodes.expect("nodes need to be present"), + }), + None, + ) + } else if let Some(hash) = hash.as_ref() { + ( + None, + Some(DataHash { + index: hash.index, + nodes: p.nodes.expect("nodes need to be set"), + }), + ) + } else { + (None, None) + }; + + let data_seek: Option = if let Some(seek) = seek.as_ref() { + p.seek.map(|p_seek| DataSeek { + bytes: seek.bytes, + nodes: p_seek, + }) + } else { + None + }; + + let data_upgrade: Option = if let Some(upgrade) = upgrade.as_ref() { + Some(DataUpgrade { + start: upgrade.start, + length: upgrade.length, + nodes: p.upgrade.expect("nodes need to be set"), + additional_nodes: if let Some(additional_upgrade) = p.additional_upgrade { + additional_upgrade + } else { + vec![] + }, + signature: signature + .expect("signature needs to be set") + .to_bytes() + .to_vec(), + }) + } else { + None + }; + + Ok(Either::Right(ValuelessProof { + fork, + block: data_block, + hash: data_hash, + seek: data_seek, + upgrade: data_upgrade, + })) + } else { + Ok(Either::Left(instructions.into_boxed_slice())) + } + } + + /// Verifies a proof received from a peer. + pub(crate) fn verify_proof( + &mut self, + proof: &Proof, + public_key: &VerifyingKey, + infos: Option<&[StoreInfo]>, + ) -> Result, MerkleTreeChangeset>, HypercoreError> { + let nodes: IntMap> = self.infos_to_nodes(infos)?; + let mut instructions: Vec = Vec::new(); + let mut changeset = self.changeset(); + + let mut unverified_block_root_node = verify_tree( + proof.block.as_ref(), + proof.hash.as_ref(), + proof.seek.as_ref(), + &mut changeset, + )?; + if let Some(upgrade) = proof.upgrade.as_ref() { + if verify_upgrade( + proof.fork, + upgrade, + unverified_block_root_node.as_ref(), + public_key, + &mut changeset, + )? { + unverified_block_root_node = None; + } + } + + if let Some(unverified_block_root_node) = unverified_block_root_node { + let node_or_instruction = + self.required_node(unverified_block_root_node.index, &nodes)?; + match node_or_instruction { + Either::Left(instruction) => { + instructions.push(instruction); + } + Either::Right(verified_block_root_node) => { + if verified_block_root_node.hash != unverified_block_root_node.hash { + return Err(HypercoreError::InvalidChecksum { + context: format!( + "Invalid checksum at node {}, store {}", + unverified_block_root_node.index, + Store::Tree + ), + }); + } + } + } + } + + if instructions.is_empty() { + Ok(Either::Right(changeset)) + } else { + Ok(Either::Left(instructions.into_boxed_slice())) + } + } + + /// Attempts to get missing nodes from given index. NB: must be called in a loop. + pub(crate) fn missing_nodes( + &mut self, + index: u64, + infos: Option<&[StoreInfo]>, + ) -> Result, u64>, HypercoreError> { + let head = 2 * self.length; + let mut iter = flat_tree::Iterator::new(index); + let iter_right_span = iter.index() + iter.factor() / 2 - 1; + + // If the index is not in the current tree, we do not know how many missing nodes there are... + if iter_right_span >= head { + return Ok(Either::Right(0)); + } + + let nodes: IntMap> = self.infos_to_nodes(infos)?; + let mut count: u64 = 0; + while !iter.contains(head) { + match self.optional_node(iter.index(), &nodes)? { + Either::Left(instruction) => { + return Ok(Either::Left(vec![instruction].into_boxed_slice())); + } + Either::Right(value) => { + if value.is_none() { + count += 1; + iter.parent(); + } else { + break; + } + } + } + } + Ok(Either::Right(count)) + } + + /// Is the changeset commitable to given tree + pub(crate) fn commitable(&self, changeset: &MerkleTreeChangeset) -> bool { + let correct_length: bool = if changeset.upgraded { + changeset.original_tree_length == self.length + } else { + changeset.original_tree_length <= self.length + }; + changeset.original_tree_fork == self.fork && correct_length + } + + fn commit_truncation(&mut self, changeset: &MerkleTreeChangeset) { + if changeset.ancestors < changeset.original_tree_length { + if changeset.ancestors > 0 { + let head = 2 * changeset.ancestors; + let mut iter = flat_tree::Iterator::new(head - 2); + loop { + let index = iter.index(); + if iter.contains(head) && index < head { + self.unflushed.insert(index, Node::new_blank(index)); + } + + if iter.offset() == 0 { + break; + } + iter.parent(); + } + } + + self.truncate_to = if self.truncated { + std::cmp::min(self.truncate_to, changeset.ancestors) + } else { + changeset.ancestors + }; + + self.truncated = true; + let mut unflushed_indices_to_delete: Vec = Vec::new(); + for node in self.unflushed.iter() { + if *node.0 >= 2 * changeset.ancestors { + unflushed_indices_to_delete.push(*node.0); + } + } + for index_to_delete in unflushed_indices_to_delete { + self.unflushed.remove(index_to_delete); + } + } + } + + pub(crate) fn flush_truncation(&mut self) -> Vec { + let offset = if self.truncate_to == 0 { + 0 + } else { + (self.truncate_to - 1) * 80 + 40 + }; + self.truncate_to = 0; + self.truncated = false; + vec![StoreInfo::new_truncate(Store::Tree, offset)] + } + + pub(crate) fn flush_nodes(&mut self) -> Vec { + let mut infos_to_flush: Vec = Vec::with_capacity(self.unflushed.len()); + for (_, node) in self.unflushed.drain() { + let (mut state, mut buffer) = State::new_with_size(40); + state + .encode_u64(node.length, &mut buffer) + .expect("Encoding u64 should not fail"); + state + .encode_fixed_32(&node.hash, &mut buffer) + .expect("Encoding fixed 32 bytes should not fail"); + infos_to_flush.push(StoreInfo::new_content( + Store::Tree, + node.index * 40, + &buffer, + )); + } + infos_to_flush + } + + /// Validates given hypercore index and returns tree index + fn validate_hypercore_index(&self, hypercore_index: u64) -> Result { + // Converts a hypercore index into a merkle tree index + let index = hypercore_index_into_merkle_tree_index(hypercore_index); + + // Check bounds + let head = 2 * self.length; + let compare_index = if index & 1 == 0 { + index + } else { + flat_tree::right_span(index) + }; + if compare_index >= head { + return Err(HypercoreError::BadArgument { + context: format!("Hypercore index {hypercore_index} is out of bounds"), + }); + } + Ok(index) + } + + fn byte_offset_from_index( + &mut self, + index: u64, + infos: Option<&[StoreInfo]>, + ) -> Result, u64>, HypercoreError> { + // Get nodes out of incoming infos + let nodes: IntMap> = self.infos_to_nodes(infos)?; + // Get offset + let offset_result = self.byte_offset_from_nodes(index, &nodes)?; + // Get offset + match offset_result { + Either::Left(offset_instructions) => { + Ok(Either::Left(offset_instructions.into_boxed_slice())) + } + Either::Right(offset) => Ok(Either::Right(offset)), + } + } + + fn byte_offset_from_nodes( + &self, + index: u64, + nodes: &IntMap>, + ) -> Result, u64>, HypercoreError> { + let index = if (index & 1) == 1 { + flat_tree::left_span(index) + } else { + index + }; + let mut head: u64 = 0; + let mut offset: u64 = 0; + + for root_node in &self.roots { + head += 2 * ((root_node.index - head) + 1); + + if index >= head { + offset += root_node.length; + continue; + } + let mut iter = flat_tree::Iterator::new(root_node.index); + + let mut instructions: Vec = Vec::new(); + while iter.index() != index { + if index < iter.index() { + iter.left_child(); + } else { + let left_child = iter.left_child(); + let node_or_instruction = self.required_node(left_child, nodes)?; + match node_or_instruction { + Either::Left(instruction) => { + instructions.push(instruction); + } + Either::Right(node) => { + offset += node.length; + } + } + iter.sibling(); + } + } + return if instructions.is_empty() { + Ok(Either::Right(offset)) + } else { + Ok(Either::Left(instructions)) + }; + } + + Err(HypercoreError::BadArgument { + context: format!("Could not calculate byte offset for index {index}"), + }) + } + + fn required_node( + &self, + index: u64, + nodes: &IntMap>, + ) -> Result, HypercoreError> { + match self.node(index, nodes, false)? { + Either::Left(value) => Ok(Either::Left(value)), + Either::Right(node) => { + if let Some(node) = node { + Ok(Either::Right(node)) + } else { + Err(HypercoreError::InvalidOperation { + context: format!("Node at {} is required, store {}", index, Store::Tree), + }) + } + } + } + } + + fn optional_node( + &self, + index: u64, + nodes: &IntMap>, + ) -> Result>, HypercoreError> { + self.node(index, nodes, true) + } + + fn node( + &self, + index: u64, + nodes: &IntMap>, + allow_miss: bool, + ) -> Result>, HypercoreError> { + // First check the cache + #[cfg(feature = "cache")] + if let Some(node_cache) = &self.node_cache { + if let Some(node) = node_cache.get(&index) { + return Ok(Either::Right(Some(node))); + } + } + + // Then check if unflushed has the node + if let Some(node) = self.unflushed.get(index) { + if node.blank || (self.truncated && node.index >= 2 * self.truncate_to) { + // The node is either blank or being deleted + return if allow_miss { + Ok(Either::Right(None)) + } else { + Err(HypercoreError::InvalidOperation { + context: format!( + "Could not load node: {}, store {}, unflushed", + index, + Store::Tree + ), + }) + }; + } + return Ok(Either::Right(Some(node.clone()))); + } + + // Then check if it's in the incoming nodes + let result = nodes.get(index); + if let Some(node_maybe) = result { + if let Some(node) = node_maybe { + if node.blank { + return if allow_miss { + Ok(Either::Right(None)) + } else { + Err(HypercoreError::InvalidOperation { + context: format!( + "Could not load node: {}, store {}, blank", + index, + Store::Tree + ), + }) + }; + } + return Ok(Either::Right(Some(node.clone()))); + } else if allow_miss { + return Ok(Either::Right(None)); + } else { + return Err(HypercoreError::InvalidOperation { + context: format!( + "Could not load node: {}, store {}, empty", + index, + Store::Tree + ), + }); + } + } + + // If not, return an instruction + let offset = 40 * index; + let length = 40; + let info = if allow_miss { + StoreInfoInstruction::new_content_allow_miss(Store::Tree, offset, length) + } else { + StoreInfoInstruction::new_content(Store::Tree, offset, length) + }; + Ok(Either::Left(info)) + } + + fn seek_from_head( + &self, + head: u64, + bytes: u64, + nodes: &IntMap>, + ) -> Result, u64>, HypercoreError> { + let mut instructions: Vec = Vec::new(); + let mut roots = vec![]; + flat_tree::full_roots(head, &mut roots); + let mut bytes = bytes; + + for root in roots { + let node_or_instruction = self.required_node(root, nodes)?; + match node_or_instruction { + Either::Left(instruction) => { + instructions.push(instruction); + } + Either::Right(node) => { + if bytes == node.length { + return Ok(Either::Right(root)); + } + if bytes > node.length { + bytes -= node.length; + continue; + } + let instructions_or_result = self.seek_trusted_tree(root, bytes, nodes)?; + return match instructions_or_result { + Either::Left(new_instructions) => { + instructions.extend(new_instructions); + Ok(Either::Left(instructions)) + } + Either::Right(index) => Ok(Either::Right(index)), + }; + } + } + } + + if instructions.is_empty() { + Ok(Either::Right(head)) + } else { + Ok(Either::Left(instructions)) + } + } + + /// Trust that bytes are within the root tree and find the block at bytes. + fn seek_trusted_tree( + &self, + root: u64, + bytes: u64, + nodes: &IntMap>, + ) -> Result, u64>, HypercoreError> { + if bytes == 0 { + return Ok(Either::Right(root)); + } + let mut iter = flat_tree::Iterator::new(root); + let mut instructions: Vec = Vec::new(); + let mut bytes = bytes; + while iter.index() & 1 != 0 { + let node_or_instruction = self.optional_node(iter.left_child(), nodes)?; + match node_or_instruction { + Either::Left(instruction) => { + instructions.push(instruction); + // Need to break immediately because it is unknown + // if this node is the one that will match. This means + // this function needs to be called in a loop where incoming + // nodes increase with each call. + break; + } + Either::Right(node) => { + if let Some(node) = node { + if node.length == bytes { + return Ok(Either::Right(iter.index())); + } + if node.length > bytes { + continue; + } + bytes -= node.length; + iter.sibling(); + } else { + iter.parent(); + return Ok(Either::Right(iter.index())); + } + } + } + } + if instructions.is_empty() { + Ok(Either::Right(iter.index())) + } else { + Ok(Either::Left(instructions)) + } + } + + /// Try to find the block at bytes without trusting that it *is* within the root passed. + fn seek_untrusted_tree( + &self, + root: u64, + bytes: u64, + nodes: &IntMap>, + ) -> Result, u64>, HypercoreError> { + let mut instructions: Vec = Vec::new(); + let offset_or_instructions = self.byte_offset_from_nodes(root, nodes)?; + let mut bytes = bytes; + match offset_or_instructions { + Either::Left(new_instructions) => { + instructions.extend(new_instructions); + } + Either::Right(offset) => { + if offset > bytes { + return Err(HypercoreError::InvalidOperation { + context: "Invalid seek, wrong offset".to_string(), + }); + } + if offset == bytes { + return Ok(Either::Right(root)); + } + bytes -= offset; + let node_or_instruction = self.required_node(root, nodes)?; + match node_or_instruction { + Either::Left(instruction) => { + instructions.push(instruction); + } + Either::Right(node) => { + if node.length <= bytes { + return Err(HypercoreError::InvalidOperation { + context: "Invalid seek, wrong length".to_string(), + }); + } + } + } + } + } + let instructions_or_result = self.seek_trusted_tree(root, bytes, nodes)?; + match instructions_or_result { + Either::Left(new_instructions) => { + instructions.extend(new_instructions); + Ok(Either::Left(instructions)) + } + Either::Right(index) => Ok(Either::Right(index)), + } + } + + fn block_and_seek_proof( + &self, + indexed: Option<&NormalizedIndexed>, + is_seek: bool, + seek_root: u64, + root: u64, + p: &mut LocalProof, + nodes: &IntMap>, + ) -> Result, ()>, HypercoreError> { + if let Some(indexed) = indexed { + let mut iter = flat_tree::Iterator::new(indexed.index); + let mut instructions: Vec = Vec::new(); + let mut p_nodes: Vec = Vec::new(); + + if !indexed.value { + let node_or_instruction = self.required_node(iter.index(), nodes)?; + match node_or_instruction { + Either::Left(instruction) => { + instructions.push(instruction); + } + Either::Right(node) => { + p_nodes.push(node); + } + } + } + while iter.index() != root { + iter.sibling(); + if is_seek && iter.contains(seek_root) && iter.index() != seek_root { + let success_or_instruction = + self.seek_proof(seek_root, iter.index(), p, nodes)?; + if let Either::Left(new_instructions) = success_or_instruction { + instructions.extend(new_instructions); + } + } else { + let node_or_instruction = self.required_node(iter.index(), nodes)?; + match node_or_instruction { + Either::Left(instruction) => { + instructions.push(instruction); + } + Either::Right(node) => { + p_nodes.push(node); + } + } + } + + iter.parent(); + } + p.nodes = Some(p_nodes); + if instructions.is_empty() { + Ok(Either::Right(())) + } else { + Ok(Either::Left(instructions)) + } + } else { + self.seek_proof(seek_root, root, p, nodes) + } + } + + fn seek_proof( + &self, + seek_root: u64, + root: u64, + p: &mut LocalProof, + nodes: &IntMap>, + ) -> Result, ()>, HypercoreError> { + let mut iter = flat_tree::Iterator::new(seek_root); + let mut instructions: Vec = Vec::new(); + let mut seek_nodes: Vec = Vec::new(); + let node_or_instruction = self.required_node(iter.index(), nodes)?; + match node_or_instruction { + Either::Left(instruction) => { + instructions.push(instruction); + } + Either::Right(node) => { + seek_nodes.push(node); + } + } + + while iter.index() != root { + iter.sibling(); + let node_or_instruction = self.required_node(iter.index(), nodes)?; + match node_or_instruction { + Either::Left(instruction) => { + instructions.push(instruction); + } + Either::Right(node) => { + seek_nodes.push(node); + } + } + iter.parent(); + } + p.seek = Some(seek_nodes); + if instructions.is_empty() { + Ok(Either::Right(())) + } else { + Ok(Either::Left(instructions)) + } + } + + #[allow(clippy::too_many_arguments)] + fn upgrade_proof( + &self, + indexed: Option<&NormalizedIndexed>, + is_seek: bool, + from: u64, + to: u64, + sub_tree: u64, + p: &mut LocalProof, + nodes: &IntMap>, + ) -> Result, ()>, HypercoreError> { + let mut instructions: Vec = Vec::new(); + let mut upgrade: Vec = Vec::new(); + let mut has_upgrade = false; + + if from == 0 { + has_upgrade = true; + } + + let mut iter = flat_tree::Iterator::new(0); + let mut has_full_root = iter.full_root(to); + while has_full_root { + // check if they already have the node + if iter.index() + iter.factor() / 2 < from { + iter.next_tree(); + has_full_root = iter.full_root(to); + continue; + } + + // connect existing tree + if !has_upgrade && iter.contains(from - 2) { + has_upgrade = true; + let root = iter.index(); + let target = from - 2; + + iter.seek(target); + + while iter.index() != root { + iter.sibling(); + if iter.index() > target { + if p.nodes.is_none() && p.seek.is_none() && iter.contains(sub_tree) { + let success_or_instructions = self.block_and_seek_proof( + indexed, + is_seek, + sub_tree, + iter.index(), + p, + nodes, + )?; + if let Either::Left(new_instructions) = success_or_instructions { + instructions.extend(new_instructions); + } + } else { + let node_or_instruction = self.required_node(iter.index(), nodes)?; + match node_or_instruction { + Either::Left(instruction) => { + instructions.push(instruction); + } + Either::Right(node) => upgrade.push(node), + } + } + } + iter.parent(); + } + + iter.next_tree(); + has_full_root = iter.full_root(to); + continue; + } + + if !has_upgrade { + has_upgrade = true; + } + + // if the subtree included is a child of this tree, include that one + // instead of a dup node + if p.nodes.is_none() && p.seek.is_none() && iter.contains(sub_tree) { + let success_or_instructions = + self.block_and_seek_proof(indexed, is_seek, sub_tree, iter.index(), p, nodes)?; + if let Either::Left(new_instructions) = success_or_instructions { + instructions.extend(new_instructions); + } + iter.next_tree(); + has_full_root = iter.full_root(to); + continue; + } + + // add root (can be optimised since the root might be in tree.roots) + let node_or_instruction = self.required_node(iter.index(), nodes)?; + match node_or_instruction { + Either::Left(instruction) => { + instructions.push(instruction); + } + Either::Right(node) => upgrade.push(node), + } + + iter.next_tree(); + has_full_root = iter.full_root(to); + } + + if has_upgrade { + p.upgrade = Some(upgrade); + } + + if instructions.is_empty() { + Ok(Either::Right(())) + } else { + Ok(Either::Left(instructions)) + } + } + + fn additional_upgrade_proof( + &self, + from: u64, + to: u64, + p: &mut LocalProof, + nodes: &IntMap>, + ) -> Result, ()>, HypercoreError> { + let mut instructions: Vec = Vec::new(); + let mut additional_upgrade: Vec = Vec::new(); + let mut has_additional_upgrade = false; + + if from == 0 { + has_additional_upgrade = true; + } + + let mut iter = flat_tree::Iterator::new(0); + let mut has_full_root = iter.full_root(to); + while has_full_root { + // check if they already have the node + if iter.index() + iter.factor() / 2 < from { + iter.next_tree(); + has_full_root = iter.full_root(to); + continue; + } + + // connect existing tree + if !has_additional_upgrade && iter.contains(from - 2) { + has_additional_upgrade = true; + let root = iter.index(); + let target = from - 2; + + iter.seek(target); + + while iter.index() != root { + iter.sibling(); + if iter.index() > target { + let node_or_instruction = self.required_node(iter.index(), nodes)?; + match node_or_instruction { + Either::Left(instruction) => { + instructions.push(instruction); + } + Either::Right(node) => additional_upgrade.push(node), + } + } + iter.parent(); + } + + iter.next_tree(); + has_full_root = iter.full_root(to); + continue; + } + + if !has_additional_upgrade { + has_additional_upgrade = true; + } + + // add root (can be optimised since the root is in tree.roots) + let node_or_instruction = self.required_node(iter.index(), nodes)?; + match node_or_instruction { + Either::Left(instruction) => { + instructions.push(instruction); + } + Either::Right(node) => additional_upgrade.push(node), + } + + iter.next_tree(); + has_full_root = iter.full_root(to); + } + + if has_additional_upgrade { + p.additional_upgrade = Some(additional_upgrade); + } + + if instructions.is_empty() { + Ok(Either::Right(())) + } else { + Ok(Either::Left(instructions)) + } + } + + fn infos_to_nodes( + &mut self, + infos: Option<&[StoreInfo]>, + ) -> Result>, HypercoreError> { + match infos { + Some(infos) => { + let mut nodes: IntMap> = IntMap::with_capacity(infos.len()); + for info in infos { + let index = index_from_info(info); + if !info.miss { + let node = node_from_bytes(&index, info.data.as_ref().unwrap())?; + #[cfg(feature = "cache")] + if !node.blank { + if let Some(node_cache) = &self.node_cache { + node_cache.insert(node.index, node.clone()) + } + } + nodes.insert(index, Some(node)); + } else { + nodes.insert(index, None); + } + } + Ok(nodes) + } + None => Ok(IntMap::new()), + } + } +} + +/// Converts a hypercore index into a merkle tree index. In the flat tree +/// representation, the leaves are in the even numbers, and the parents +/// odd. That's why we need to double the hypercore index value to get +/// the right merkle tree index. +fn hypercore_index_into_merkle_tree_index(hypercore_index: u64) -> u64 { + 2 * hypercore_index +} + +fn verify_tree( + block: Option<&DataBlock>, + hash: Option<&DataHash>, + seek: Option<&DataSeek>, + changeset: &mut MerkleTreeChangeset, +) -> Result, HypercoreError> { + let untrusted_node: Option = normalize_data(block, hash); + + if untrusted_node.is_none() { + let no_seek = if let Some(seek) = seek.as_ref() { + seek.nodes.is_empty() + } else { + true + }; + if no_seek { + return Ok(None); + } + } + + let mut root: Option = None; + + if let Some(seek) = seek { + if !seek.nodes.is_empty() { + let mut iter = flat_tree::Iterator::new(seek.nodes[0].index); + let mut q = NodeQueue::new(seek.nodes.clone(), None); + let node = q.shift(iter.index())?; + let mut current_root: Node = node.clone(); + changeset.nodes.push(node); + while q.length > 0 { + let node = q.shift(iter.sibling())?; + let parent_node = parent_node(iter.parent(), ¤t_root, &node); + current_root = parent_node.clone(); + changeset.nodes.push(node); + changeset.nodes.push(parent_node); + } + root = Some(current_root); + } + } + + if let Some(untrusted_node) = untrusted_node { + let mut iter = flat_tree::Iterator::new(untrusted_node.index); + + let mut q = NodeQueue::new(untrusted_node.nodes, root); + let node: Node = if let Some(value) = untrusted_node.value { + block_node(iter.index(), &value) + } else { + q.shift(iter.index())? + }; + let mut current_root = node.clone(); + changeset.nodes.push(node); + while q.length > 0 { + let node = q.shift(iter.sibling())?; + let parent_node = parent_node(iter.parent(), ¤t_root, &node); + current_root = parent_node.clone(); + changeset.nodes.push(node); + changeset.nodes.push(parent_node); + } + root = Some(current_root); + } + Ok(root) +} + +fn verify_upgrade( + fork: u64, + upgrade: &DataUpgrade, + block_root: Option<&Node>, + public_key: &VerifyingKey, + changeset: &mut MerkleTreeChangeset, +) -> Result { + let mut q = if let Some(block_root) = block_root { + NodeQueue::new(upgrade.nodes.clone(), Some(block_root.clone())) + } else { + NodeQueue::new(upgrade.nodes.clone(), None) + }; + let mut grow: bool = !changeset.roots.is_empty(); + let mut i: usize = 0; + let to: u64 = 2 * (upgrade.start + upgrade.length); + let mut iter = flat_tree::Iterator::new(0); + while iter.full_root(to) { + if i < changeset.roots.len() && changeset.roots[i].index == iter.index() { + i += 1; + iter.next_tree(); + continue; + } + if grow { + grow = false; + let root_index = iter.index(); + if i < changeset.roots.len() { + iter.seek(changeset.roots[changeset.roots.len() - 1].index); + while iter.index() != root_index { + changeset.append_root(q.shift(iter.sibling())?, &mut iter); + } + iter.next_tree(); + continue; + } + } + changeset.append_root(q.shift(iter.index())?, &mut iter); + iter.next_tree(); + } + let extra = &upgrade.additional_nodes; + + iter.seek(changeset.roots[changeset.roots.len() - 1].index); + i = 0; + + while i < extra.len() && extra[i].index == iter.sibling() { + changeset.append_root(extra[i].clone(), &mut iter); + i += 1; + } + + while i < extra.len() { + let node = extra[i].clone(); + i += 1; + while node.index != iter.index() { + if iter.factor() == 2 { + return Err(HypercoreError::InvalidOperation { + context: format!("Unexpected node: {}, store: {}", node.index, Store::Tree), + }); + } + iter.left_child(); + } + changeset.append_root(node, &mut iter); + iter.sibling(); + } + changeset.fork = fork; + changeset.verify_and_set_signature(&upgrade.signature, public_key)?; + Ok(q.extra.is_none()) +} + +fn get_root_indices(header_tree_length: &u64) -> Vec { + let mut roots = vec![]; + flat_tree::full_roots(header_tree_length * 2, &mut roots); + roots +} + +fn index_from_info(info: &StoreInfo) -> u64 { + info.index / NODE_SIZE +} + +fn node_from_bytes(index: &u64, data: &[u8]) -> Result { + let len_buf = &data[..8]; + let hash = &data[8..]; + let mut state = State::from_buffer(len_buf); + let len = state.decode_u64(len_buf)?; + Ok(Node::new(*index, hash.to_vec(), len)) +} + +#[derive(Debug, Copy, Clone)] +struct NormalizedIndexed { + value: bool, + index: u64, + nodes: u64, + last_index: u64, +} + +fn normalize_indexed( + block: Option<&RequestBlock>, + hash: Option<&RequestBlock>, +) -> Option { + if let Some(block) = block { + Some(NormalizedIndexed { + value: true, + index: block.index * 2, + nodes: block.nodes, + last_index: block.index, + }) + } else { + hash.map(|hash| NormalizedIndexed { + value: false, + index: hash.index, + nodes: hash.nodes, + last_index: flat_tree::right_span(hash.index) / 2, + }) + } +} + +#[derive(Debug, Clone)] +struct NormalizedData { + value: Option>, + index: u64, + nodes: Vec, +} + +fn normalize_data(block: Option<&DataBlock>, hash: Option<&DataHash>) -> Option { + if let Some(block) = block { + Some(NormalizedData { + value: Some(block.value.clone()), + index: block.index * 2, + nodes: block.nodes.clone(), + }) + } else { + hash.map(|hash| NormalizedData { + value: None, + index: hash.index, + nodes: hash.nodes.clone(), + }) + } +} + +/// Struct to use for local building of proof +#[derive(Debug, Clone)] +struct LocalProof { + seek: Option>, + nodes: Option>, + upgrade: Option>, + additional_upgrade: Option>, +} + +fn nodes_to_root(index: u64, nodes: u64, head: u64) -> Result { + let mut iter = flat_tree::Iterator::new(index); + for _ in 0..nodes { + iter.parent(); + if iter.contains(head) { + return Err(HypercoreError::InvalidOperation { + context: format!( + "Nodes is out of bounds, index: {index}, nodes: {nodes}, head {head}" + ), + }); + } + } + Ok(iter.index()) +} + +fn parent_node(index: u64, left: &Node, right: &Node) -> Node { + Node::new( + index, + Hash::parent(left, right).as_bytes().to_vec(), + left.length + right.length, + ) +} + +fn block_node(index: u64, value: &Vec) -> Node { + Node::new( + index, + Hash::data(value).as_bytes().to_vec(), + value.len() as u64, + ) +} + +/// Node queue +struct NodeQueue { + i: usize, + nodes: Vec, + extra: Option, + length: usize, +} +impl NodeQueue { + fn new(nodes: Vec, extra: Option) -> Self { + let length = nodes.len() + if extra.is_some() { 1 } else { 0 }; + Self { + i: 0, + nodes, + extra, + length, + } + } + fn shift(&mut self, index: u64) -> Result { + if let Some(extra) = self.extra.take() { + if extra.index == index { + self.length -= 1; + return Ok(extra); + } else { + self.extra = Some(extra); + } + } + if self.i >= self.nodes.len() { + return Err(HypercoreError::InvalidOperation { + context: format!("Expected node {index}, got (nil)"), + }); + } + let node = self.nodes[self.i].clone(); + self.i += 1; + if node.index != index { + return Err(HypercoreError::InvalidOperation { + context: format!("Expected node {index}, got node {}", node.index), + }); + } + self.length -= 1; + Ok(node) + } +} diff --git a/src/tree/merkle_tree_changeset.rs b/src/tree/merkle_tree_changeset.rs new file mode 100644 index 00000000..be28873f --- /dev/null +++ b/src/tree/merkle_tree_changeset.rs @@ -0,0 +1,131 @@ +use ed25519_dalek::{Signature, SigningKey, VerifyingKey}; +use std::convert::TryFrom; + +use crate::{ + crypto::{signable_tree, verify, Hash}, + sign, HypercoreError, Node, +}; + +/// Changeset for a `MerkleTree`. This allows to incrementally change a `MerkleTree` in two steps: +/// first create the changes to this changeset, get out information from this to put to the oplog, +/// and the commit the changeset to the tree. +/// +/// This is called "MerkleTreeBatch" in Javascript, see: +/// https://github.com/hypercore-protocol/hypercore/blob/master/lib/merkle-tree.js +#[derive(Debug)] +pub(crate) struct MerkleTreeChangeset { + pub(crate) length: u64, + pub(crate) ancestors: u64, + pub(crate) byte_length: u64, + pub(crate) batch_length: u64, + pub(crate) fork: u64, + pub(crate) roots: Vec, + pub(crate) nodes: Vec, + pub(crate) hash: Option>, + pub(crate) signature: Option, + pub(crate) upgraded: bool, + + // Safeguarding values + pub(crate) original_tree_length: u64, + pub(crate) original_tree_fork: u64, +} + +impl MerkleTreeChangeset { + pub(crate) fn new( + length: u64, + byte_length: u64, + fork: u64, + roots: Vec, + ) -> MerkleTreeChangeset { + Self { + length, + ancestors: length, + byte_length, + batch_length: 0, + fork, + roots, + nodes: vec![], + hash: None, + signature: None, + upgraded: false, + original_tree_length: length, + original_tree_fork: fork, + } + } + + pub(crate) fn append(&mut self, data: &[u8]) -> usize { + let len = data.len(); + let head = self.length * 2; + let mut iter = flat_tree::Iterator::new(head); + let node = Node::new(head, Hash::data(data).as_bytes().to_vec(), len as u64); + self.append_root(node, &mut iter); + self.batch_length += 1; + len + } + + pub(crate) fn append_root(&mut self, node: Node, iter: &mut flat_tree::Iterator) { + self.upgraded = true; + self.length += iter.factor() / 2; + self.byte_length += node.length; + self.roots.push(node.clone()); + self.nodes.push(node); + + while self.roots.len() > 1 { + let a = &self.roots[self.roots.len() - 1]; + let b = &self.roots[self.roots.len() - 2]; + if iter.sibling() != b.index { + iter.sibling(); // unset so it always points to last root + break; + } + + let node = Node::new( + iter.parent(), + Hash::parent(a, b).as_bytes().into(), + a.length + b.length, + ); + let _ = &self.nodes.push(node.clone()); + let _ = &self.roots.pop(); + let _ = &self.roots.pop(); + let _ = &self.roots.push(node); + } + } + + /// Hashes and signs the changeset + pub(crate) fn hash_and_sign(&mut self, signing_key: &SigningKey) { + let hash = self.hash(); + let signable = self.signable(&hash); + let signature = sign(signing_key, &signable); + self.hash = Some(hash); + self.signature = Some(signature); + } + + /// Verify and set signature with given public key + pub(crate) fn verify_and_set_signature( + &mut self, + signature: &[u8], + public_key: &VerifyingKey, + ) -> Result<(), HypercoreError> { + // Verify that the received signature matches the public key + let signature = + Signature::try_from(signature).map_err(|_| HypercoreError::InvalidSignature { + context: "Could not parse signature".to_string(), + })?; + let hash = self.hash(); + verify(public_key, &self.signable(&hash), Some(&signature))?; + + // Set values to changeset + self.hash = Some(hash); + self.signature = Some(signature); + Ok(()) + } + + /// Calculates a hash of the current set of roots + pub(crate) fn hash(&self) -> Box<[u8]> { + Hash::tree(&self.roots).as_bytes().into() + } + + /// Creates a signable slice from given hash + pub(crate) fn signable(&self, hash: &[u8]) -> Box<[u8]> { + signable_tree(hash, self.length, self.fork) + } +} diff --git a/src/tree/mod.rs b/src/tree/mod.rs new file mode 100644 index 00000000..02367a2a --- /dev/null +++ b/src/tree/mod.rs @@ -0,0 +1,5 @@ +mod merkle_tree; +mod merkle_tree_changeset; + +pub(crate) use merkle_tree::MerkleTree; +pub(crate) use merkle_tree_changeset::MerkleTreeChangeset; diff --git a/tests/bitfield.rs b/tests/bitfield.rs deleted file mode 100644 index 566ac26e..00000000 --- a/tests/bitfield.rs +++ /dev/null @@ -1,195 +0,0 @@ -use rand; - -use hypercore::bitfield::{Bitfield, Change::*}; -use rand::Rng; - -#[test] -fn set_and_get() { - let (mut b, _) = Bitfield::new(); - - assert_eq!(b.get(0), false); - assert_eq!(b.set(0, true), Changed); - assert_eq!(b.set(0, true), Unchanged); - assert_eq!(b.get(0), true); - - assert_eq!(b.get(1_424_244), false); - assert_eq!(b.set(1_424_244, true), Changed); - assert_eq!(b.set(1_424_244, true), Unchanged); - assert_eq!(b.get(1_424_244), true); -} - -#[test] -fn set_and_get_tree() { - let (mut b, mut tree) = Bitfield::new(); - - { - assert_eq!(tree.get(0), false); - assert_eq!(tree.set(0, true), Changed); - assert_eq!(tree.set(0, true), Unchanged); - assert_eq!(tree.get(0), true); - - assert_eq!(tree.get(1_424_244), false); - assert_eq!(tree.set(1_424_244, true), Changed); - assert_eq!(tree.set(1_424_244, true), Unchanged); - assert_eq!(tree.get(1_424_244), true); - } - - assert_eq!(b.get(0), false); - assert_eq!(b.get(1_424_244), false); -} - -#[test] -fn set_and_index() { - let (mut b, _) = Bitfield::new(); - - { - let mut iter = b.iterator_with_range(0, 100_000_000); - assert_eq!(iter.next(), Some(0)); - } - - b.set(0, true); - { - let mut iter = b.iterator_with_range(0, 100_000_000); - assert_eq!(iter.seek(0).next(), Some(1)); - } - - b.set(479, true); - { - let mut iter = b.iterator_with_range(0, 100_000_000); - assert_eq!(iter.seek(478).next(), Some(478)); - assert_eq!(iter.next(), Some(480)); - } - - b.set(1, true); - { - let mut iter = b.iterator_with_range(0, 100_000_000); - assert_eq!(iter.seek(0).next(), Some(2)); - } - - b.set(2, true); - { - let mut iter = b.iterator_with_range(0, 100_000_000); - assert_eq!(iter.seek(0).next(), Some(3)); - } - - b.set(3, true); - { - let mut iter = b.iterator_with_range(0, 100_000_000); - assert_eq!(iter.seek(0).next(), Some(4)); - } - - let len = b.len(); - for i in 0..len { - b.set(i, true); - } - { - let mut iter = b.iterator_with_range(0, 100_000_000); - assert_eq!(iter.seek(0).next(), Some(len)); - } - - for i in 0..len { - b.set(i, false); - } - { - let mut iter = b.iterator_with_range(0, 100_000_000); - assert_eq!(iter.seek(0).next(), Some(0)); - } -} - -#[test] -fn set_and_index_random() { - let (mut b, _) = Bitfield::new(); - - let mut rng = rand::thread_rng(); - for _ in 0..100 { - assert!(check(&mut b), "index validates"); - set(&mut b, rng.gen_range(0, 2000), rng.gen_range(0, 8)); - } - - assert!(check(&mut b), "index validates"); - - fn check(b: &mut Bitfield) -> bool { - let mut all = vec![true; b.len() as usize]; - - { - let mut iter = b.iterator(); - - while let Some(i) = iter.next() { - all[i as usize] = false; - } - } - - for (i, &v) in all.iter().enumerate() { - if b.get(i as u64) != v { - return false; - } - } - - true - } - - fn set(b: &mut Bitfield, i: u64, n: u64) { - for j in i..i + n { - b.set(j, true); - } - } -} - -#[test] -fn get_total_positive_bits() { - let (mut b, _) = Bitfield::new(); - - assert_eq!(b.set(1, true), Changed); - assert_eq!(b.set(2, true), Changed); - assert_eq!(b.set(4, true), Changed); - assert_eq!(b.set(5, true), Changed); - assert_eq!(b.set(39, true), Changed); - - assert_eq!(b.total_with_range(0..4), 2); - assert_eq!(b.total_with_range(3..4), 0); - assert_eq!(b.total_with_range(3..5), 1); - assert_eq!(b.total_with_range(3..40), 3); - assert_eq!(b.total(), 5); - assert_eq!(b.total_with_start(7), 1); -} - -#[test] -fn bitfield_dedup() { - let (mut b, mut tree) = Bitfield::new(); - - for i in 0..32 * 1024 { - b.set(i, true); - } - - for i in 0..64 * 1024 { - tree.set(i, true); - } - - assert!(b.get(8 * 1024)); - assert!(b.get(16 * 1024)); - b.set(8 * 1024, false); - assert!(!b.get(8 * 1024)); - assert!(b.get(16 * 1024)); -} - -#[test] -fn bitfield_compress() { - let (mut b, _) = Bitfield::new(); - assert_eq!(b.compress(0, 0).unwrap(), vec![0]); - - b.set(1, true); - assert_eq!(b.compress(0, 0).unwrap(), vec![2, 64, 253, 31]); - - b.set(1_424_244, true); - assert_eq!( - b.compress(0, 0).unwrap(), - vec![2, 64, 181, 187, 43, 2, 8, 197, 4] - ); - assert_eq!(b.compress(0, 1).unwrap(), vec![2, 64, 253, 31]); - assert_eq!( - b.compress(1_424_244, 1).unwrap(), - vec![185, 27, 2, 8, 197, 4] - ); - - assert_eq!(b.compress(1_424_244_000, 1).unwrap(), vec![0]); -} diff --git a/tests/common/mod.rs b/tests/common/mod.rs index fa43ddb0..fbe8616c 100644 --- a/tests/common/mod.rs +++ b/tests/common/mod.rs @@ -1,12 +1,102 @@ -use hypercore; +use anyhow::Result; +use ed25519_dalek::{SigningKey, VerifyingKey, PUBLIC_KEY_LENGTH, SECRET_KEY_LENGTH}; +use random_access_disk::RandomAccessDisk; +use sha2::{Digest, Sha256}; +use std::io::prelude::*; +use std::path::Path; -use anyhow::Error; -use futures::future::FutureExt; -use hypercore::{Feed, Storage, Store}; -use random_access_memory as ram; +use hypercore::{Hypercore, HypercoreBuilder, PartialKeypair, Storage}; -pub async fn create_feed(page_size: usize) -> Result, Error> { - let create = |_store: Store| async move { Ok(ram::RandomAccessMemory::new(page_size)) }.boxed(); - let storage = Storage::new(create, false).await?; - Feed::with_storage(storage).await +const TEST_PUBLIC_KEY_BYTES: [u8; PUBLIC_KEY_LENGTH] = [ + 0x97, 0x60, 0x6c, 0xaa, 0xd2, 0xb0, 0x8c, 0x1d, 0x5f, 0xe1, 0x64, 0x2e, 0xee, 0xa5, 0x62, 0xcb, + 0x91, 0xd6, 0x55, 0xe2, 0x00, 0xc8, 0xd4, 0x3a, 0x32, 0x09, 0x1d, 0x06, 0x4a, 0x33, 0x1e, 0xe3, +]; +// NB: In the javascript version this is 64 bytes, but that's because sodium appends the the public +// key after the secret key for some reason. Only the first 32 bytes are actually used in +// javascript side too for signing. +const TEST_SECRET_KEY_BYTES: [u8; SECRET_KEY_LENGTH] = [ + 0x27, 0xe6, 0x74, 0x25, 0xc1, 0xff, 0xd1, 0xd9, 0xee, 0x62, 0x5c, 0x96, 0x2b, 0x57, 0x13, 0xc3, + 0x51, 0x0b, 0x71, 0x14, 0x15, 0xf3, 0x31, 0xf6, 0xfa, 0x9e, 0xf2, 0xbf, 0x23, 0x5f, 0x2f, 0xfe, +]; + +#[derive(PartialEq, Debug)] +pub struct HypercoreHash { + pub bitfield: Option, + pub data: Option, + pub oplog: Option, + pub tree: Option, +} + +pub fn get_test_key_pair() -> PartialKeypair { + let public = VerifyingKey::from_bytes(&TEST_PUBLIC_KEY_BYTES).unwrap(); + let signing_key = SigningKey::from_bytes(&TEST_SECRET_KEY_BYTES); + assert_eq!(public.to_bytes(), signing_key.verifying_key().to_bytes()); + let secret = Some(signing_key); + PartialKeypair { public, secret } +} + +pub async fn create_hypercore(work_dir: &str) -> Result> { + let path = Path::new(work_dir).to_owned(); + let key_pair = get_test_key_pair(); + let storage = Storage::new_disk(&path, true).await?; + Ok(HypercoreBuilder::new(storage) + .key_pair(key_pair) + .build() + .await?) +} + +pub async fn open_hypercore(work_dir: &str) -> Result> { + let path = Path::new(work_dir).to_owned(); + let storage = Storage::new_disk(&path, false).await?; + Ok(HypercoreBuilder::new(storage).open(true).build().await?) +} + +pub fn create_hypercore_hash(dir: &str) -> HypercoreHash { + let bitfield = hash_file(format!("{dir}/bitfield")); + let data = hash_file(format!("{dir}/data")); + let oplog = hash_file(format!("{dir}/oplog")); + let tree = hash_file(format!("{dir}/tree")); + HypercoreHash { + bitfield, + data, + oplog, + tree, + } +} + +pub fn hash_file(file: String) -> Option { + let path = std::path::Path::new(&file); + if !path.exists() { + None + } else { + let mut hasher = Sha256::new(); + let mut file = std::fs::File::open(path).unwrap(); + std::io::copy(&mut file, &mut hasher).unwrap(); + let hash_bytes = hasher.finalize(); + let hash = format!("{hash_bytes:X}"); + // Empty file has this hash, don't make a difference between missing and empty file. Rust + // is much easier and performant to write if the empty file is created. + if hash == *"E3B0C44298FC1C149AFBF4C8996FB92427AE41E4649B934CA495991B7852B855" { + None + } else { + Some(format!("{hash_bytes:X}")) + } + } +} + +pub fn storage_contains_data(dir: &Path, data: &[u8]) -> bool { + for file_name in ["bitfield", "data", "oplog", "tree"] { + let file_path = dir.join(file_name); + let mut file = std::fs::File::open(file_path).unwrap(); + let mut buffer = Vec::new(); + file.read_to_end(&mut buffer).unwrap(); + if is_sub(&buffer, data) { + return true; + } + } + false +} + +fn is_sub(haystack: &[T], needle: &[T]) -> bool { + haystack.windows(needle.len()).any(|c| c == needle) } diff --git a/tests/compat.rs b/tests/compat.rs deleted file mode 100644 index 93b6ec34..00000000 --- a/tests/compat.rs +++ /dev/null @@ -1,179 +0,0 @@ -//! Based on https://github.com/mafintosh/hypercore/blob/cf08d8c907e302cf4b699738f229b050eba41b59/test/compat.js - -use ed25519_dalek; - -use tempfile; - -use std::convert::TryFrom; -use std::fs::File; -use std::io::Read; -use std::path::{Path, PathBuf}; - -use data_encoding::HEXLOWER; -use ed25519_dalek::{Keypair, Signature}; -use hypercore::Feed; -use hypercore::{Storage, Store}; -use random_access_disk::RandomAccessDisk; -use remove_dir_all::remove_dir_all; - -#[async_std::test] -async fn deterministic_data_and_tree() { - let expected_tree = hex_bytes(concat!( - "0502570200002807424c414b4532620000000000000000000000000000000000ab27d45f509274", - "ce0d08f4f09ba2d0e0d8df61a0c2a78932e81b5ef26ef398df0000000000000001064321a8413b", - "e8c604599689e2c7a59367b031b598bceeeb16556a8f3252e0de000000000000000294c1705400", - "5942a002c7c39fbb9c6183518691fb401436f1a2f329b380230af800000000000000018dfe81d5", - "76464773f848b9aba1c886fde57a49c283ab57f4a297d976d986651e00000000000000041d2fad", - "c9ce604c7e592949edc964e45aaa10990d7ee53328439ef9b2cf8aa6ff00000000000000013a8d", - "cc74e80b8314e8e13e1e462358cf58cf5fc4413a9b18a891ffacc551c395000000000000000228", - "28647a654a712738e35f49d1c05c676010be0b33882affc1d1e7e9fee59d400000000000000001", - "000000000000000000000000000000000000000000000000000000000000000000000000000000", - "00baac70b6d38243efa028ee977c462e4bec73d21d09ceb8cc16f4d4b1ee228a45000000000000", - "0001d1b021632c7fab84544053379112ca7b165bb21283821816c5b6c89ff7f78e2d0000000000", - "000002d2ab421cece792033058787a5ba72f3a701fddc25540d5924e9819d7c12e02f200000000", - "00000001" - )); - - for _ in 0..5 { - let (dir, storage) = mk_storage().await; - let mut feed = Feed::with_storage(storage).await.unwrap(); - - let data = b"abcdef"; - for &b in data { - feed.append(&[b]).await.unwrap(); - } - assert_eq!(read_bytes(&dir, Store::Data), data); - assert_eq!(read_bytes(&dir, Store::Tree), expected_tree); - - remove_dir_all(dir).unwrap() - } -} - -#[test] -#[ignore] -fn deterministic_data_and_tree_after_replication() { - // Port from mafintosh/hypercore when the necessary features are implemented - unimplemented!(); -} - -#[async_std::test] -async fn deterministic_signatures() { - let key = hex_bytes("9718a1ff1c4ca79feac551c0c7212a65e4091278ec886b88be01ee4039682238"); - let keypair_bytes = hex_bytes(concat!( - "53729c0311846cca9cc0eded07aaf9e6689705b6a0b1bb8c3a2a839b72fda383", - "9718a1ff1c4ca79feac551c0c7212a65e4091278ec886b88be01ee4039682238" - )); - - let compat_v9_expected_signatures = hex_bytes(concat!( - "050257010000400745643235353139000000000000000000000000000000000084684e8dd76c339", - "d6f5754e813204906ee818e6c6cdc6a816a2ac785a3e0d926ac08641a904013194fe6121847b7da", - "d4e361965d47715428eb0a0ededbdd5909d037ff3c3614fa0100ed9264a712d3b77cbe7a4f6eadd", - "8f342809be99dfb9154a19e278d7a5de7d2b4d890f7701a38b006469f6bab1aff66ac6125d48baf", - "dc0711057675ed57d445ce7ed4613881be37ebc56bb40556b822e431bb4dc3517421f9a5e3ed124", - "eb5c4db8367386d9ce12b2408613b9fec2837022772a635ffd807", - )); - let compat_signatures_len = compat_v9_expected_signatures.len(); - let compat_signature_struct = compat_v9_expected_signatures - .into_iter() - .skip(compat_signatures_len - 64) - .collect::>(); - - let expected_signatures = hex_bytes(concat!( - "42e057f2c225b4c5b97876a15959324931ad84646a8bf2e4d14487c0f117966a585edcdda54670d", - "d5def829ca85924ce44ae307835e57d5729aef8cd91678b06", - )); - - for _ in 0..5 { - let (dir, storage) = mk_storage().await; - let keypair = mk_keypair(&keypair_bytes, &key); - let mut feed = Feed::builder(keypair.public, storage) - .secret_key(keypair.secret) - .build() - .await - .unwrap(); - - let data = b"abc"; - for &b in data { - feed.append(&[b]).await.unwrap(); - } - - assert_eq!(read_bytes(&dir, Store::Data), data); - let actual_signatures = read_bytes(&dir, Store::Signatures); - let actual_signatures_len = actual_signatures.len(); - assert_eq!( - actual_signatures - .into_iter() - .skip(actual_signatures_len - 64) - .collect::>(), - expected_signatures - ); - - let compat_signature = Signature::try_from(&compat_signature_struct[..]).unwrap(); - feed.verify(feed.len() - 1, &compat_signature) - .await - .expect("Could not verify compat signature of hypercore v9"); - - remove_dir_all(dir).unwrap() - } -} - -#[test] -#[ignore] -fn compat_signatures_work() { - // Port from mafintosh/hypercore when the necessary features are implemented - unimplemented!(); -} - -#[test] -#[ignore] -fn deterministic_signatures_after_replication() { - // Port from mafintosh/hypercore when the necessary features are implemented - unimplemented!(); -} - -fn hex_bytes(hex: &str) -> Vec { - HEXLOWER.decode(hex.as_bytes()).unwrap() -} - -fn storage_path>(dir: P, s: Store) -> PathBuf { - let filename = match s { - Store::Tree => "tree", - Store::Data => "data", - Store::Bitfield => "bitfield", - Store::Signatures => "signatures", - Store::Keypair => "key", - }; - dir.as_ref().join(filename) -} - -async fn mk_storage() -> (PathBuf, Storage) { - let temp_dir = tempfile::tempdir().unwrap(); - let dir = temp_dir.into_path(); - let storage = Storage::new( - |s| { - let dir = dir.clone(); - Box::pin(async move { RandomAccessDisk::open(storage_path(dir, s)).await }) - }, - false, - ) - .await - .unwrap(); - (dir, storage) -} - -fn read_bytes>(dir: P, s: Store) -> Vec { - let mut f = File::open(storage_path(dir, s)).unwrap(); - let mut bytes = Vec::new(); - f.read_to_end(&mut bytes).unwrap(); - bytes -} - -fn mk_keypair(keypair_bytes: &[u8], public_key: &[u8]) -> Keypair { - let keypair = Keypair::from_bytes(&keypair_bytes).unwrap(); - assert_eq!( - keypair.secret.as_bytes().as_ref(), - &keypair_bytes[..ed25519_dalek::SECRET_KEY_LENGTH] - ); - assert_eq!(keypair.public.as_bytes().as_ref(), public_key); - keypair -} diff --git a/tests/core.rs b/tests/core.rs new file mode 100644 index 00000000..f3e8d2ec --- /dev/null +++ b/tests/core.rs @@ -0,0 +1,79 @@ +pub mod common; + +use anyhow::Result; +use common::{create_hypercore, get_test_key_pair, open_hypercore, storage_contains_data}; +use hypercore::{HypercoreBuilder, Storage}; +use tempfile::Builder; +use test_log::test; + +#[cfg(feature = "async-std")] +use async_std::test as async_test; +#[cfg(feature = "tokio")] +use tokio::test as async_test; + +#[test(async_test)] +async fn hypercore_new() -> Result<()> { + let storage = Storage::new_memory().await?; + let _hypercore = HypercoreBuilder::new(storage).build(); + Ok(()) +} + +#[test(async_test)] +async fn hypercore_new_with_key_pair() -> Result<()> { + let storage = Storage::new_memory().await?; + let key_pair = get_test_key_pair(); + let _hypercore = HypercoreBuilder::new(storage) + .key_pair(key_pair) + .build() + .await?; + Ok(()) +} + +#[test(async_test)] +async fn hypercore_open_with_key_pair_error() -> Result<()> { + let storage = Storage::new_memory().await?; + let key_pair = get_test_key_pair(); + assert!(HypercoreBuilder::new(storage) + .key_pair(key_pair) + .open(true) + .build() + .await + .is_err()); + Ok(()) +} + +#[test(async_test)] +async fn hypercore_make_read_only() -> Result<()> { + let dir = Builder::new() + .prefix("hypercore_make_read_only") + .tempdir() + .unwrap(); + let write_key_pair = { + let mut hypercore = create_hypercore(&dir.path().to_string_lossy()).await?; + hypercore.append(b"Hello").await?; + hypercore.append(b"World!").await?; + hypercore.key_pair().clone() + }; + assert!(storage_contains_data( + dir.path(), + &write_key_pair.secret.as_ref().unwrap().to_bytes() + )); + assert!(write_key_pair.secret.is_some()); + let read_key_pair = { + let mut hypercore = open_hypercore(&dir.path().to_string_lossy()).await?; + assert_eq!(&hypercore.get(0).await?.unwrap(), b"Hello"); + assert_eq!(&hypercore.get(1).await?.unwrap(), b"World!"); + assert!(hypercore.make_read_only().await?); + hypercore.key_pair().clone() + }; + assert!(read_key_pair.secret.is_none()); + assert!(!storage_contains_data( + dir.path(), + &write_key_pair.secret.as_ref().unwrap().to_bytes()[16..], + )); + + let mut hypercore = open_hypercore(&dir.path().to_string_lossy()).await?; + assert_eq!(&hypercore.get(0).await?.unwrap(), b"Hello"); + assert_eq!(&hypercore.get(1).await?.unwrap(), b"World!"); + Ok(()) +} diff --git a/tests/feed.rs b/tests/feed.rs deleted file mode 100644 index 3bb1828e..00000000 --- a/tests/feed.rs +++ /dev/null @@ -1,340 +0,0 @@ -extern crate random_access_memory as ram; - -mod common; - -use common::create_feed; -use hypercore::{generate_keypair, Feed, NodeTrait, PublicKey, SecretKey, Storage}; -use random_access_storage::RandomAccess; -use std::env::temp_dir; -use std::fmt::Debug; -use std::fs; -use std::io::Write; - -#[async_std::test] -async fn create_with_key() { - let keypair = generate_keypair(); - let storage = Storage::new_memory().await.unwrap(); - let _feed = Feed::builder(keypair.public, storage) - .secret_key(keypair.secret) - .build() - .await - .unwrap(); -} - -#[async_std::test] -async fn display() { - let feed = create_feed(50).await.unwrap(); - let output = format!("{}", feed); - assert_eq!(output.len(), 61); -} - -#[async_std::test] -/// Verify `.append()` and `.get()` work. -async fn set_get() { - let mut feed = create_feed(50).await.unwrap(); - feed.append(b"hello").await.unwrap(); - feed.append(b"world").await.unwrap(); - - assert_eq!(feed.get(0).await.unwrap(), Some(b"hello".to_vec())); - assert_eq!(feed.get(1).await.unwrap(), Some(b"world".to_vec())); -} - -#[async_std::test] -async fn append() { - let mut feed = create_feed(50).await.unwrap(); - feed.append(br#"{"hello":"world"}"#).await.unwrap(); - feed.append(br#"{"hello":"mundo"}"#).await.unwrap(); - feed.append(br#"{"hello":"welt"}"#).await.unwrap(); - - assert_eq!(feed.len(), 3); - assert_eq!(feed.byte_len(), 50); - - assert_eq!( - feed.get(0).await.unwrap(), - Some(br#"{"hello":"world"}"#.to_vec()) - ); - assert_eq!( - feed.get(1).await.unwrap(), - Some(br#"{"hello":"mundo"}"#.to_vec()) - ); - assert_eq!( - feed.get(2).await.unwrap(), - Some(br#"{"hello":"welt"}"#.to_vec()) - ); -} - -#[async_std::test] -/// Verify the `.root_hashes()` method returns the right nodes. -async fn root_hashes() { - // If no roots exist we should get an error. - let mut feed = create_feed(50).await.unwrap(); - let res = feed.root_hashes(0).await; - assert!(res.is_err()); - - // If 1 entry exists, [0] should be the root. - feed.append(b"data").await.unwrap(); - let roots = feed.root_hashes(0).await.unwrap(); - assert_eq!(roots.len(), 1); - assert_eq!(roots[0].index(), 0); - - // If we query out of bounds, we should get an error. - let res = feed.root_hashes(6).await; - assert!(res.is_err()); - - // If 3 entries exist, [2,4] should be the roots. - feed.append(b"data").await.unwrap(); - feed.append(b"data").await.unwrap(); - let roots = feed.root_hashes(2).await.unwrap(); - assert_eq!(roots.len(), 2); - assert_eq!(roots[0].index(), 1); - assert_eq!(roots[1].index(), 4); -} - -#[async_std::test] -async fn verify() { - let mut feed = create_feed(50).await.unwrap(); - let (public, secret) = copy_keys(&feed); - let feed_bytes = secret.to_bytes().to_vec(); - let storage = Storage::new( - |_| Box::pin(async { Ok(ram::RandomAccessMemory::new(50)) }), - false, - ) - .await - .unwrap(); - let mut evil_feed = Feed::builder(public, storage) - .secret_key(secret) - .build() - .await - .unwrap(); - - let evil_bytes = match &feed.secret_key() { - Some(key) => key.to_bytes(), - None => panic!("no secret key found"), - }; - - // Verify the keys are the same. - assert_eq!(&feed_bytes, &evil_bytes.to_vec()); - - // Verify that the signature on a single feed is correct. - feed.append(b"test").await.unwrap(); - let sig = feed.signature(0).await.unwrap(); - feed.verify(0, &sig).await.unwrap(); - - // Verify that the signature between two different feeds is different. - evil_feed.append(b"t0st").await.unwrap(); - let res = evil_feed.verify(0, &sig).await; - assert!(res.is_err()); -} - -#[async_std::test] -async fn put() { - let mut a = create_feed(50).await.unwrap(); - let (public, secret) = copy_keys(&a); - let storage = Storage::new( - |_| Box::pin(async { Ok(ram::RandomAccessMemory::new(50)) }), - false, - ) - .await - .unwrap(); - let mut b = Feed::builder(public, storage) - .secret_key(secret) - .build() - .await - .unwrap(); - - for _ in 0..10u8 { - a.append(b"foo").await.unwrap(); - } - - let proof = a.proof(0, true).await.unwrap(); - b.put(0, None, proof).await.expect("no error"); - let proof = a - .proof_with_digest(4, b.digest(4), true) - .await - .expect(".proof() index 4, digest 4"); - b.put(4, None, proof).await.unwrap(); -} - -#[async_std::test] -/// Put data from one feed into another, while veryfing hashes. -/// I.e. manual replication between two feeds. -async fn put_with_data() { - // Create a writable feed. - let mut a = create_feed(50).await.unwrap(); - - // Create a second feed with the first feed's key. - let (public, secret) = copy_keys(&a); - let storage = Storage::new_memory().await.unwrap(); - let mut b = Feed::builder(public, storage) - .secret_key(secret) - .build() - .await - .unwrap(); - - // Append 4 blocks of data to the writable feed. - a.append(b"hi").await.unwrap(); - a.append(b"ola").await.unwrap(); - a.append(b"ahoj").await.unwrap(); - a.append(b"salut").await.unwrap(); - - for i in 0..4 { - // Generate a proof for the index. - // The `include_hash` argument has to be set to false. - let a_proof = a.proof(i, false).await.unwrap(); - // Get the data for the index. - let a_data = a.get(i).await.unwrap(); - - // Put the data into the other hypercore. - b.put(i, a_data.as_deref(), a_proof.clone()).await.unwrap(); - - // Load the data we've put. - let b_data = b.get(i).await.unwrap(); - - // Assert the data was put correctly. - assert!(a_data == b_data, "Data correct"); - } -} - -#[async_std::test] -async fn create_with_storage() { - let storage = Storage::new_memory().await.unwrap(); - assert!( - Feed::with_storage(storage).await.is_ok(), - "Could not create a feed with a storage." - ); -} - -#[async_std::test] -async fn create_with_stored_public_key() { - let mut storage = Storage::new_memory().await.unwrap(); - let keypair = generate_keypair(); - storage.write_public_key(&keypair.public).await.unwrap(); - assert!( - Feed::with_storage(storage).await.is_ok(), - "Could not create a feed with a stored public key." - ); -} - -#[async_std::test] -async fn create_with_stored_keys() { - let mut storage = Storage::new_memory().await.unwrap(); - let keypair = generate_keypair(); - storage.write_public_key(&keypair.public).await.unwrap(); - storage.write_secret_key(&keypair.secret).await.unwrap(); - assert!( - Feed::with_storage(storage).await.is_ok(), - "Could not create a feed with a stored keypair." - ); -} - -fn copy_keys( - feed: &Feed> + Debug + Send>, -) -> (PublicKey, SecretKey) { - match &feed.secret_key() { - Some(secret) => { - let secret = secret.to_bytes(); - let public = &feed.public_key().to_bytes(); - - let public = PublicKey::from_bytes(public).unwrap(); - let secret = SecretKey::from_bytes(&secret).unwrap(); - - (public, secret) - } - _ => panic!(": Could not access secret key"), - } -} - -#[async_std::test] -async fn audit() { - let mut feed = create_feed(50).await.unwrap(); - feed.append(b"hello").await.unwrap(); - feed.append(b"world").await.unwrap(); - match feed.audit().await { - Ok(audit_report) => { - assert_eq!(audit_report.valid_blocks, 2); - assert_eq!(audit_report.invalid_blocks, 0); - } - Err(e) => { - panic!(e); - } - } -} - -#[async_std::test] -async fn audit_bad_data() { - let mut dir = temp_dir(); - dir.push("audit_bad_data"); - let storage = Storage::new_disk(&dir, false).await.unwrap(); - let mut feed = Feed::with_storage(storage).await.unwrap(); - feed.append(b"hello").await.unwrap(); - feed.append(b"world").await.unwrap(); - let datapath = dir.join("data"); - let mut hypercore_data = fs::OpenOptions::new() - .write(true) - .open(datapath) - .expect("Unable to open the hypercore's data file!"); - hypercore_data - .write_all(b"yello") - .expect("Unable to corrupt the hypercore data file!"); - - match feed.audit().await { - Ok(audit_report) => { - assert_eq!(audit_report.valid_blocks, 1); - assert_eq!(audit_report.invalid_blocks, 1); - // Ensure that audit has cleared up the invalid block - match feed.audit().await { - Ok(audit_report) => { - assert_eq!( - audit_report.valid_blocks, 1, - "Audit did not clean up the invalid block!" - ); - assert_eq!( - audit_report.invalid_blocks, 0, - "Audit did not clean up the invalid block!" - ); - fs::remove_dir_all(dir) - .expect("Should be able to remove our temporary directory"); - } - Err(e) => { - fs::remove_dir_all(dir) - .expect("Should be able to remove our temporary directory"); - panic!(e); - } - } - } - Err(e) => { - fs::remove_dir_all(dir).expect("Should be able to remove our temporary directory"); - panic!(e); - } - } -} - -#[async_std::test] -async fn try_open_missing_dir() { - use rand::distributions::Alphanumeric; - use rand::{thread_rng, Rng}; - - let rand_string: String = thread_rng().sample_iter(&Alphanumeric).take(5).collect(); - let mut dir = std::env::temp_dir(); - let path = format!("hypercore_rs_test/nonexistent_paths_test/{}", rand_string); - dir.push(path); - - if Feed::open(&dir).await.is_err() { - panic!("Opening nonexistent dir at a path should succeed"); - } - - if let Ok(d) = std::fs::metadata(dir) { - if !d.is_dir() { - panic!("Opening nonexistent dir at a path must create dir"); - } - } else { - panic!("Opening nonexistent dir at a path must create dir"); - } -} - -#[async_std::test] -async fn try_open_file_as_dir() { - if Feed::open("Cargo.toml").await.is_ok() { - panic!("Opening path that points to a file must result in error"); - } -} diff --git a/tests/js/interop.js b/tests/js/interop.js new file mode 100644 index 00000000..59e9f373 --- /dev/null +++ b/tests/js/interop.js @@ -0,0 +1,128 @@ +const Hypercore = require('hypercore'); + +// Static test key pair obtained with: +// +// const crypto = require('hypercore-crypto'); +// const keyPair = crypto.keyPair(); +// console.log("public key", keyPair.publicKey.toString('hex').match(/../g).join(' ')); +// console.log("secret key", keyPair.secretKey.toString('hex').match(/../g).join(' ')); +const testKeyPair = { + publicKey: Buffer.from([ + 0x97, 0x60, 0x6c, 0xaa, 0xd2, 0xb0, 0x8c, 0x1d, 0x5f, 0xe1, 0x64, 0x2e, 0xee, 0xa5, 0x62, 0xcb, + 0x91, 0xd6, 0x55, 0xe2, 0x00, 0xc8, 0xd4, 0x3a, 0x32, 0x09, 0x1d, 0x06, 0x4a, 0x33, 0x1e, 0xe3]), + secretKey: Buffer.from([ + 0x27, 0xe6, 0x74, 0x25, 0xc1, 0xff, 0xd1, 0xd9, 0xee, 0x62, 0x5c, 0x96, 0x2b, 0x57, 0x13, 0xc3, + 0x51, 0x0b, 0x71, 0x14, 0x15, 0xf3, 0x31, 0xf6, 0xfa, 0x9e, 0xf2, 0xbf, 0x23, 0x5f, 0x2f, 0xfe, + 0x97, 0x60, 0x6c, 0xaa, 0xd2, 0xb0, 0x8c, 0x1d, 0x5f, 0xe1, 0x64, 0x2e, 0xee, 0xa5, 0x62, 0xcb, + 0x91, 0xd6, 0x55, 0xe2, 0x00, 0xc8, 0xd4, 0x3a, 0x32, 0x09, 0x1d, 0x06, 0x4a, 0x33, 0x1e, 0xe3]), +} + +if (process.argv.length !== 4) { + console.error("Usage: node interop.js [test step] [test set]") + process.exit(1); +} + +if (process.argv[2] === '1') { + step1Create(process.argv[3]).then(result => { + console.log("step1 ready", result); + }); +} else if (process.argv[2] === '2'){ + step2AppendHelloWorld(process.argv[3]).then(result => { + console.log("step2 ready", result); + }); +} else if (process.argv[2] === '3'){ + step3ReadAndAppendUnflushed(process.argv[3]).then(result => { + console.log("step3 ready", result); + }); +} else if (process.argv[2] === '4'){ + step4AppendWithFlush(process.argv[3]).then(result => { + console.log("step4 ready", result); + }); +} else if (process.argv[2] === '5'){ + step5ClearSome(process.argv[3]).then(result => { + console.log("step5 ready", result); + }); +} else { + console.error(`Invalid test step {}`, process.argv[2]); + process.exit(2); +} + +async function step1Create(testSet) { + const core = new Hypercore(`work/${testSet}`, testKeyPair.publicKey, {keyPair: testKeyPair}); + await core.close(); +}; + +async function step2AppendHelloWorld(testSet) { + const core = new Hypercore(`work/${testSet}`, testKeyPair.publicKey, {keyPair: testKeyPair}); + const result = await core.append([Buffer.from('Hello'), Buffer.from('World')]); + assert(result.length, 2); + assert(result.byteLength, 10); + await core.close(); +}; + +async function step3ReadAndAppendUnflushed(testSet) { + const core = new Hypercore(`work/${testSet}`, testKeyPair.publicKey, {keyPair: testKeyPair}); + const hello = (await core.get(0)).toString(); + const world = (await core.get(1)).toString(); + assert(hello, "Hello"); + assert(world, "World"); + let result = await core.append(Buffer.from('first')); + assert(result.length, 3); + assert(result.byteLength, 15); + result = await core.append([Buffer.from('second'), Buffer.from('third')]); + assert(result.length, 5); + assert(result.byteLength, 26); + const multiBlock = Buffer.alloc(4096*3, 'a'); + result = await core.append(multiBlock); + assert(result.length, 6); + assert(result.byteLength, 12314); + result = await core.append([]); + assert(result.length, 6); + assert(result.byteLength, 12314); + const first = (await core.get(2)).toString(); + assert(first, "first"); + const second = (await core.get(3)).toString(); + assert(second, "second"); + const third = (await core.get(4)).toString(); + assert(third, "third"); + const multiBlockRead = await core.get(5); + if (!multiBlockRead.equals(multiBlock)) { + throw new Error(`Read buffers don't equal, ${multiBlockRead} but expected ${multiBlock}`); + } + await core.close(); +}; + +async function step4AppendWithFlush(testSet) { + const core = new Hypercore(`work/${testSet}`, testKeyPair.publicKey, {keyPair: testKeyPair}); + for (let i=0; i<5; i++) { + result = await core.append(Buffer.from([i])); + assert(result.length, 6+i+1); + assert(result.byteLength, 12314+i+1); + } +} + +async function step5ClearSome(testSet) { + const core = new Hypercore(`work/${testSet}`, testKeyPair.publicKey, {keyPair: testKeyPair}); + await core.clear(5); + await core.clear(7, 9); + let info = await core.info(); + assert(info.length, 11); + assert(info.byteLength, 12319); + assert(info.contiguousLength, 5); + assert(info.padding, 0); + + let missing = await core.get(5, { wait: false }); + assert(missing, null); + missing = await core.get(7, { wait: false }); + assert(missing, null); + missing = await core.get(8, { wait: false }); + assert(missing, null); + const third = (await core.get(4)).toString(); + assert(third, "third"); +} + +function assert(real, expected) { + if (real != expected) { + throw new Error(`Got ${real} but expected ${expected}`); + } +} diff --git a/tests/js/mod.rs b/tests/js/mod.rs new file mode 100644 index 00000000..b0da51d4 --- /dev/null +++ b/tests/js/mod.rs @@ -0,0 +1,50 @@ +use std::fs::{create_dir_all, remove_dir_all, remove_file}; +use std::path::Path; +use std::process::Command; + +pub fn cleanup() { + if Path::new("tests/js/node_modules").exists() { + remove_dir_all("tests/js/node_modules").expect("Unable to run rm to delete node_modules"); + } + + if Path::new("tests/js/work").exists() { + remove_dir_all("tests/js/work").expect("Unable to run rm to delete work"); + } + if Path::new("tests/js/package-lock.json").exists() { + remove_file("tests/js/package-lock.json") + .expect("Unable to run rm to delete package-lock.json"); + } +} + +pub fn install() { + let status = Command::new("npm") + .current_dir("tests/js") + .args(["install"]) + .status() + .expect("Unable to run npm install"); + assert_eq!( + Some(0), + status.code(), + "npm install did not run successfully. Do you have npm installed and a network connection?" + ); +} + +pub fn prepare_test_set(test_set: &str) -> String { + let path = format!("tests/js/work/{}", test_set); + create_dir_all(&path).expect("Unable to create work directory"); + path +} + +pub fn js_run_step(step: u8, test_set: &str) { + let status = Command::new("npm") + .current_dir("tests/js") + .args(["run", "step", &step.to_string(), test_set]) + .status() + .expect("Unable to run npm run"); + assert_eq!( + Some(0), + status.code(), + "node step {} did not run successfully", + step + ); +} diff --git a/tests/js/package.json b/tests/js/package.json new file mode 100644 index 00000000..2c5db7da --- /dev/null +++ b/tests/js/package.json @@ -0,0 +1,10 @@ +{ + "name": "hypercore-js-interop-tests", + "version": "0.0.1", + "scripts": { + "step": "node interop.js" + }, + "dependencies": { + "hypercore": "^10" + } +} diff --git a/tests/js_interop.rs b/tests/js_interop.rs new file mode 100644 index 00000000..5d02d737 --- /dev/null +++ b/tests/js_interop.rs @@ -0,0 +1,192 @@ +pub mod common; +pub mod js; +use std::sync::Once; + +use anyhow::Result; +use common::{create_hypercore, create_hypercore_hash, open_hypercore}; +use js::{cleanup, install, js_run_step, prepare_test_set}; +use test_log::test; + +#[cfg(feature = "async-std")] +use async_std::test as async_test; +#[cfg(feature = "tokio")] +use tokio::test as async_test; + +const TEST_SET_JS_FIRST: &str = "jsfirst"; +const TEST_SET_RS_FIRST: &str = "rsfirst"; + +static INIT: Once = Once::new(); +fn init() { + INIT.call_once(|| { + // run initialization here + cleanup(); + install(); + }); +} + +#[test(async_test)] +#[cfg_attr(not(feature = "js_interop_tests"), ignore)] +async fn js_interop_js_first() -> Result<()> { + init(); + let work_dir = prepare_test_set(TEST_SET_JS_FIRST); + assert_eq!(create_hypercore_hash(&work_dir), step_0_hash()); + js_run_step(1, TEST_SET_JS_FIRST); + assert_eq!(create_hypercore_hash(&work_dir), step_1_hash()); + step_2_append_hello_world(&work_dir).await?; + assert_eq!(create_hypercore_hash(&work_dir), step_2_hash()); + js_run_step(3, TEST_SET_JS_FIRST); + assert_eq!(create_hypercore_hash(&work_dir), step_3_hash()); + step_4_append_with_flush(&work_dir).await?; + assert_eq!(create_hypercore_hash(&work_dir), step_4_hash()); + js_run_step(5, TEST_SET_JS_FIRST); + assert_eq!(create_hypercore_hash(&work_dir), step_5_hash()); + Ok(()) +} + +#[test(async_test)] +#[cfg_attr(not(feature = "js_interop_tests"), ignore)] +async fn js_interop_rs_first() -> Result<()> { + init(); + let work_dir = prepare_test_set(TEST_SET_RS_FIRST); + assert_eq!(create_hypercore_hash(&work_dir), step_0_hash()); + step_1_create(&work_dir).await?; + assert_eq!(create_hypercore_hash(&work_dir), step_1_hash()); + js_run_step(2, TEST_SET_RS_FIRST); + assert_eq!(create_hypercore_hash(&work_dir), step_2_hash()); + step_3_read_and_append_unflushed(&work_dir).await?; + assert_eq!(create_hypercore_hash(&work_dir), step_3_hash()); + js_run_step(4, TEST_SET_RS_FIRST); + assert_eq!(create_hypercore_hash(&work_dir), step_4_hash()); + step_5_clear_some(&work_dir).await?; + assert_eq!(create_hypercore_hash(&work_dir), step_5_hash()); + Ok(()) +} + +async fn step_1_create(work_dir: &str) -> Result<()> { + create_hypercore(work_dir).await?; + Ok(()) +} + +async fn step_2_append_hello_world(work_dir: &str) -> Result<()> { + let mut hypercore = open_hypercore(work_dir).await?; + let batch: &[&[u8]] = &[b"Hello", b"World"]; + let append_outcome = hypercore.append_batch(batch).await?; + assert_eq!(append_outcome.length, 2); + assert_eq!(append_outcome.byte_length, 10); + Ok(()) +} + +async fn step_3_read_and_append_unflushed(work_dir: &str) -> Result<()> { + let mut hypercore = open_hypercore(work_dir).await?; + let hello = hypercore.get(0).await?; + assert_eq!(hello.unwrap(), b"Hello"); + let world = hypercore.get(1).await?; + assert_eq!(world.unwrap(), b"World"); + let append_outcome = hypercore.append(b"first").await?; + assert_eq!(append_outcome.length, 3); + assert_eq!(append_outcome.byte_length, 15); + let batch: &[&[u8]] = &[b"second", b"third"]; + let append_outcome = hypercore.append_batch(batch).await?; + assert_eq!(append_outcome.length, 5); + assert_eq!(append_outcome.byte_length, 26); + let multi_block = &[0x61_u8; 4096 * 3]; + let append_outcome = hypercore.append(multi_block).await?; + assert_eq!(append_outcome.length, 6); + assert_eq!(append_outcome.byte_length, 12314); + let batch: Vec> = vec![]; + let append_outcome = hypercore.append_batch(&batch).await?; + assert_eq!(append_outcome.length, 6); + assert_eq!(append_outcome.byte_length, 12314); + let first = hypercore.get(2).await?; + assert_eq!(first.unwrap(), b"first"); + let second = hypercore.get(3).await?; + assert_eq!(second.unwrap(), b"second"); + let third = hypercore.get(4).await?; + assert_eq!(third.unwrap(), b"third"); + let multi_block_read = hypercore.get(5).await?; + assert_eq!(multi_block_read.unwrap(), multi_block); + Ok(()) +} + +async fn step_4_append_with_flush(work_dir: &str) -> Result<()> { + let mut hypercore = open_hypercore(work_dir).await?; + for i in 0..5 { + let append_outcome = hypercore.append(&[i]).await?; + assert_eq!(append_outcome.length, (6 + i + 1) as u64); + assert_eq!(append_outcome.byte_length, (12314 + i as u64 + 1)); + } + Ok(()) +} + +async fn step_5_clear_some(work_dir: &str) -> Result<()> { + let mut hypercore = open_hypercore(work_dir).await?; + hypercore.clear(5, 6).await?; + hypercore.clear(7, 9).await?; + let info = hypercore.info(); + assert_eq!(info.length, 11); + assert_eq!(info.byte_length, 12319); + assert_eq!(info.contiguous_length, 5); + let missing = hypercore.get(5).await?; + assert_eq!(missing, None); + let missing = hypercore.get(7).await?; + assert_eq!(missing, None); + let missing = hypercore.get(8).await?; + assert_eq!(missing, None); + let third = hypercore.get(4).await?; + assert_eq!(third.unwrap(), b"third"); + Ok(()) +} + +fn step_0_hash() -> common::HypercoreHash { + common::HypercoreHash { + bitfield: None, + data: None, + oplog: None, + tree: None, + } +} + +fn step_1_hash() -> common::HypercoreHash { + common::HypercoreHash { + bitfield: None, + data: None, + oplog: Some("A30BD5326139E8650F3D53CB43291945AE92796ABAEBE1365AC1B0C37D008936".into()), + tree: None, + } +} + +fn step_2_hash() -> common::HypercoreHash { + common::HypercoreHash { + bitfield: Some("0E2E1FF956A39192CBB68D2212288FE75B32733AB0C442B9F0471E254A0382A2".into()), + data: Some("872E4E50CE9990D8B041330C47C9DDD11BEC6B503AE9386A99DA8584E9BB12C4".into()), + oplog: Some("C65A6867991D29FCF98B4E4549C1039CB5B3C63D891BA1EA4F0BB47211BA4B05".into()), + tree: Some("8577B24ADC763F65D562CD11204F938229AD47F27915B0821C46A0470B80813A".into()), + } +} + +fn step_3_hash() -> common::HypercoreHash { + common::HypercoreHash { + bitfield: Some("DEC1593A7456C8C9407B9B8B9C89682DFFF33C3892BCC9D9F06956FEE0A1B949".into()), + data: Some("99EB5BC150A1102A7E50D15F90594660010B7FE719D54129065D1D417AA5015A".into()), + oplog: Some("5DCE3C7C86B0E129B32E5A07CA3DF668006A42F9D75399D6E4DB3F18256B8468".into()), + tree: Some("38788609A8634DC8D34F9AE723F3169ADB20768ACFDFF266A43B7E217750DD1E".into()), + } +} + +fn step_4_hash() -> common::HypercoreHash { + common::HypercoreHash { + bitfield: Some("9B844E9378A7D13D6CDD4C1FF12FB313013E5CC472C6CB46497033563FE6B8F1".into()), + data: Some("AF3AC31CFBE1733C62496CF8E856D5F1EFB4B06CBF1E74204221C89E2F3E1CDE".into()), + oplog: Some("46E01E9CECDF6E7EA85807F65C5F3CEED96583F3BF97BC6835A6DA05E39FE8E9".into()), + tree: Some("26339A21D606A1F731B90E8001030651D48378116B06A9C1EF87E2538194C2C6".into()), + } +} + +fn step_5_hash() -> common::HypercoreHash { + common::HypercoreHash { + bitfield: Some("40C9CED82AE0B7A397C9FDD14EEB7F70B74E8F1229F3ED931852591972DDC3E0".into()), + data: Some("D9FFCCEEE9109751F034ECDAE328672956B90A6E0B409C3173741B8A5D0E75AB".into()), + oplog: Some("803384F10871FB60E53A7F833E6E1E9729C6D040D960164077963092BBEBA274".into()), + tree: Some("26339A21D606A1F731B90E8001030651D48378116B06A9C1EF87E2538194C2C6".into()), + } +} diff --git a/tests/model.rs b/tests/model.rs index 967f58ba..e6a52fed 100644 --- a/tests/model.rs +++ b/tests/model.rs @@ -1,80 +1,127 @@ -mod common; +pub mod common; -use common::create_feed; -use quickcheck::{quickcheck, Arbitrary, Gen}; -use rand::seq::SliceRandom; -use rand::Rng; -use std::u8; +use proptest::prelude::*; +use proptest::test_runner::FileFailurePersistence; +use proptest_derive::Arbitrary; -const MAX_FILE_SIZE: u64 = 5 * 10; // 5mb +const MAX_FILE_SIZE: u64 = 50000; -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Arbitrary)] enum Op { - Get { index: u64 }, - Append { data: Vec }, - Verify, + Get { + #[proptest(strategy(index_strategy))] + index: u64, + }, + Append { + #[proptest(regex(data_regex))] + data: Vec, + }, + Clear { + #[proptest(strategy(divisor_strategy))] + len_divisor_for_start: u8, + #[proptest(strategy(divisor_strategy))] + len_divisor_for_length: u8, + }, } -impl Arbitrary for Op { - fn arbitrary(g: &mut G) -> Self { - let choices = [0, 1, 2]; - match choices.choose(g).expect("Value should exist") { - 0 => { - let index: u64 = g.gen_range(0, MAX_FILE_SIZE); - Op::Get { index } - } - 1 => { - let length: u64 = g.gen_range(0, MAX_FILE_SIZE / 3); - let mut data = Vec::with_capacity(length as usize); - for _ in 0..length { - data.push(u8::arbitrary(g)); - } - Op::Append { data } - } - 2 => Op::Verify, - err => panic!("Invalid choice {}", err), - } - } +fn index_strategy() -> impl Strategy { + 0..MAX_FILE_SIZE } -quickcheck! { - fn implementation_matches_model(ops: Vec) -> bool { - async_std::task::block_on(async { - let page_size = 50; +fn divisor_strategy() -> impl Strategy { + 1_u8..17_u8 +} + +fn data_regex() -> &'static str { + // Write 0..5000 byte chunks of ASCII characters as dummy data + "([ -~]{1,1}\n){0,5000}" +} + +proptest! { + #![proptest_config(ProptestConfig { + failure_persistence: Some(Box::new(FileFailurePersistence::WithSource("regressions"))), + ..Default::default() + })] + + #[test] + #[cfg(feature = "async-std")] + fn implementation_matches_model(ops: Vec) { + assert!(async_std::task::block_on(assert_implementation_matches_model(ops))); + } - let mut insta = create_feed(page_size) + #[test] + #[cfg(feature = "tokio")] + fn implementation_matches_model(ops: Vec) { + let rt = tokio::runtime::Runtime::new().unwrap(); + assert!(rt.block_on(async { + assert_implementation_matches_model(ops).await + })); + } +} + +async fn assert_implementation_matches_model(ops: Vec) -> bool { + use hypercore::{HypercoreBuilder, Storage}; + + let storage = Storage::new_memory() + .await + .expect("Memory storage creation should be successful"); + let mut hypercore = HypercoreBuilder::new(storage) + .build() .await - .expect("Instance creation should be successful"); - let mut model = vec![]; + .expect("Hypercore creation should be successful"); - for op in ops { + let mut model: Vec>> = vec![]; + + for op in ops { match op { - Op::Append { data } => { - insta.append(&data).await.expect("Append should be successful"); - model.push(data); - }, - Op::Get { index } => { - let data = insta.get(index).await.expect("Get should be successful"); - if index >= insta.len() { - assert_eq!(data, None); - } else { - assert_eq!(data, Some(model[index as usize].clone())); + Op::Append { data } => { + hypercore + .append(&data) + .await + .expect("Append should be successful"); + model.push(Some(data)); + } + Op::Get { index } => { + let data = hypercore + .get(index) + .await + .expect("Get should be successful"); + if index >= hypercore.info().length { + assert_eq!(data, None); + } else { + assert_eq!(data, model[index as usize].clone()); + } } - }, - Op::Verify => { - let len = insta.len(); - if len == 0 { - insta.signature(len).await.unwrap_err(); - } else { - // Always test index of last entry, which is `len - 1`. - let len = len - 1; - let sig = insta.signature(len).await.expect("Signature should exist"); - insta.verify(len, &sig).await.expect("Signature should match"); + Op::Clear { + len_divisor_for_start, + len_divisor_for_length, + } => { + let start = { + let result = model.len() as u64 / len_divisor_for_start as u64; + if result == model.len() as u64 { + if !model.is_empty() { + result - 1 + } else { + 0 + } + } else { + result + } + }; + let length = model.len() as u64 / len_divisor_for_length as u64; + let end = start + length; + let model_end = if end < model.len() as u64 { + end + } else { + model.len() as u64 + }; + hypercore + .clear(start, end) + .await + .expect("Clear should be successful"); + model[start as usize..model_end as usize].fill(None); } - }, } - } - true - }) - } + } + true } diff --git a/tests/regression.rs b/tests/regression.rs deleted file mode 100644 index 8b6a00e6..00000000 --- a/tests/regression.rs +++ /dev/null @@ -1,18 +0,0 @@ -mod common; - -use common::create_feed; - -// Postmortem: errors were happening correctly, but the error check in -// `.signature()` was off. Instead of checking for a range (`<`), we were -// checking inclusively `<=`. All we had to do was fix the check, and we all -// good. -#[async_std::test] -async fn regression_01() { - let mut feed = create_feed(50).await.unwrap(); - assert_eq!(feed.len(), 0); - feed.signature(0).await.unwrap_err(); - - let data = b"some_data"; - feed.append(data).await.unwrap(); - feed.signature(0).await.unwrap(); -} diff --git a/tests/storage.rs b/tests/storage.rs deleted file mode 100644 index d540b310..00000000 --- a/tests/storage.rs +++ /dev/null @@ -1,51 +0,0 @@ -use ed25519_dalek::PublicKey; -use hypercore::{generate_keypair, sign, verify, Signature, Storage}; - -#[async_std::test] -async fn should_write_and_read_keypair() { - let keypair = generate_keypair(); - let msg = b"hello"; - // prepare a signature - let sig: Signature = sign(&keypair.public, &keypair.secret, msg); - - let mut storage = Storage::new_memory().await.unwrap(); - assert!( - storage.write_secret_key(&keypair.secret).await.is_ok(), - "Can not store secret key." - ); - assert!( - storage.write_public_key(&keypair.public).await.is_ok(), - "Can not store public key." - ); - - let read = storage.read_public_key().await; - assert!(read.is_ok(), "Can not read public key"); - let public_key: PublicKey = read.unwrap(); - assert!(verify(&public_key, msg, Some(&sig)).is_ok()); -} - -#[async_std::test] -async fn should_read_partial_keypair() { - let keypair = generate_keypair(); - let mut storage = Storage::new_memory().await.unwrap(); - assert!( - storage.write_public_key(&keypair.public).await.is_ok(), - "Can not store public key." - ); - - let partial = storage.read_partial_keypair().await.unwrap(); - assert!(partial.secret.is_none(), "A secret key is present"); -} - -#[async_std::test] -async fn should_read_no_keypair() { - let mut storage = Storage::new_memory().await.unwrap(); - let partial = storage.read_partial_keypair().await; - assert!(partial.is_none(), "A key is present"); -} - -#[async_std::test] -async fn should_read_empty_public_key() { - let mut storage = Storage::new_memory().await.unwrap(); - assert!(storage.read_public_key().await.is_err()); -}