From 8e8378b980ac4d25421e2568c48bcb3e3c88f4fe Mon Sep 17 00:00:00 2001 From: Robert Pack Date: Fri, 10 Jan 2025 22:08:44 +0100 Subject: [PATCH 01/23] chore: setup dat test scaffolding Signed-off-by: Robert Pack --- .github/actions/load_dat/action.yaml | 26 ++++ .github/actions/setup-env/action.yml | 4 +- .gitignore | 4 +- Cargo.toml | 9 +- crates/core/Cargo.toml | 7 +- crates/core/src/kernel/snapshot/mod.rs | 1 + crates/core/src/kernel/snapshot/next.rs | 183 ++++++++++++++++++++++++ crates/test/Cargo.toml | 13 +- crates/test/src/acceptance/data.rs | 130 +++++++++++++++++ crates/test/src/acceptance/meta.rs | 73 ++++++++++ crates/test/src/acceptance/mod.rs | 5 + crates/test/src/lib.rs | 1 + 12 files changed, 444 insertions(+), 12 deletions(-) create mode 100644 .github/actions/load_dat/action.yaml create mode 100644 crates/core/src/kernel/snapshot/next.rs create mode 100644 crates/test/src/acceptance/data.rs create mode 100644 crates/test/src/acceptance/meta.rs create mode 100644 crates/test/src/acceptance/mod.rs diff --git a/.github/actions/load_dat/action.yaml b/.github/actions/load_dat/action.yaml new file mode 100644 index 0000000000..071db58ba0 --- /dev/null +++ b/.github/actions/load_dat/action.yaml @@ -0,0 +1,26 @@ +name: Delta Acceptance Tests +description: Load Delta Lake acceptance test data + +inputs: + version: + description: "The Python version to set up" + required: false + default: "0.0.3" + + target-directory: + description: target directory for acceptance test data + required: false + default: ${{ github.workspace }}/dat + +runs: + using: composite + + steps: + - name: load DAT + shell: bash + run: | + rm -rf {{ inputs.target-directory }} + curl -OL https://github.com/delta-incubator/dat/releases/download/v${{ inputs.version }}/deltalake-dat-v${{ inputs.version }}.tar.gz + mkdir -p {{ inputs.target-directory }} + tar --no-same-permissions -xzf deltalake-dat-v${{ inputs.version }}.tar.gz --directory {{ inputs.target-directory }} + rm deltalake-dat-v${{ inputs.version }}.tar.gz diff --git a/.github/actions/setup-env/action.yml b/.github/actions/setup-env/action.yml index 8339c45449..74f05ea84d 100644 --- a/.github/actions/setup-env/action.yml +++ b/.github/actions/setup-env/action.yml @@ -4,12 +4,12 @@ description: "Set up Python, virtual environment, and Rust toolchain" inputs: python-version: description: "The Python version to set up" - required: true + required: false default: "3.10" rust-toolchain: description: "The Rust toolchain to set up" - required: true + required: false default: "stable" runs: diff --git a/.gitignore b/.gitignore index 18dcc39f69..ee7ca99235 100644 --- a/.gitignore +++ b/.gitignore @@ -23,6 +23,7 @@ __blobstorage__ .githubchangeloggenerator.cache/ .githubchangeloggenerator* data +.zed/ # Add all Cargo.lock files except for those in binary crates Cargo.lock @@ -32,4 +33,5 @@ Cargo.lock justfile site -__pycache__ \ No newline at end of file +__pycache__ +dat/ diff --git a/Cargo.toml b/Cargo.toml index c500941247..8ac14e5209 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,8 +26,12 @@ debug = true debug = "line-tables-only" [workspace.dependencies] -delta_kernel = { version = "=0.6.0", features = ["default-engine"] } +#delta_kernel = { version = "=0.6.0", features = ["default-engine"] } #delta_kernel = { path = "../delta-kernel-rs/kernel", features = ["sync-engine"] } +delta_kernel = { git = "https://github.com/roeap/delta-kernel-rs", rev = "fcc43b50dafdc5e6b84c206492bbde8ed1115529", features = [ + "default-engine", + "developer-visibility", +] } # arrow arrow = { version = "53" } @@ -59,7 +63,7 @@ datafusion-sql = { version = "44" } # serde serde = { version = "1.0.194", features = ["derive"] } serde_json = "1" -strum = { version = "*"} +strum = { version = "*" } # "stdlib" @@ -77,4 +81,3 @@ async-trait = { version = "0.1" } futures = { version = "0.3" } tokio = { version = "1" } num_cpus = { version = "1" } - diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml index 65743fe281..d7143983a7 100644 --- a/crates/core/Cargo.toml +++ b/crates/core/Cargo.toml @@ -15,7 +15,7 @@ rust-version.workspace = true features = ["datafusion", "json", "unity-experimental"] [dependencies] -delta_kernel.workspace = true +delta_kernel = { workspace = true } # arrow arrow = { workspace = true } @@ -29,10 +29,7 @@ arrow-ord = { workspace = true } arrow-row = { workspace = true } arrow-schema = { workspace = true, features = ["serde"] } arrow-select = { workspace = true } -parquet = { workspace = true, features = [ - "async", - "object_store", -] } +parquet = { workspace = true, features = ["async", "object_store"] } pin-project-lite = "^0.2.7" # datafusion diff --git a/crates/core/src/kernel/snapshot/mod.rs b/crates/core/src/kernel/snapshot/mod.rs index 2938b3d3db..38d1dc570d 100644 --- a/crates/core/src/kernel/snapshot/mod.rs +++ b/crates/core/src/kernel/snapshot/mod.rs @@ -45,6 +45,7 @@ pub use self::log_data::*; mod log_data; pub(crate) mod log_segment; +mod next; pub(crate) mod parse; mod replay; mod serde; diff --git a/crates/core/src/kernel/snapshot/next.rs b/crates/core/src/kernel/snapshot/next.rs new file mode 100644 index 0000000000..7189fc75cd --- /dev/null +++ b/crates/core/src/kernel/snapshot/next.rs @@ -0,0 +1,183 @@ +use std::collections::HashMap; +use std::sync::{Arc, LazyLock}; + +use ::serde::{Deserialize, Serialize}; +use arrow_array::RecordBatch; +use delta_kernel::actions::{ + get_log_schema, ADD_NAME, CDC_NAME, METADATA_NAME, PROTOCOL_NAME, REMOVE_NAME, + SET_TRANSACTION_NAME, +}; +use delta_kernel::actions::{Metadata, Protocol}; +use delta_kernel::engine::default::executor::tokio::{ + TokioBackgroundExecutor, TokioMultiThreadExecutor, +}; +use delta_kernel::engine::default::DefaultEngine; +use delta_kernel::engine_data::{GetData, RowVisitor, TypedGetData as _}; +use delta_kernel::expressions::ColumnName; +use delta_kernel::scan::state::{DvInfo, Stats}; +use delta_kernel::scan::ScanBuilder; +use delta_kernel::schema::{ColumnNamesAndTypes, DataType}; +use delta_kernel::snapshot::Snapshot as SnapshotInner; +use delta_kernel::{DeltaResult as KernelResult, Engine, Error, Table}; +use futures::{StreamExt, TryStreamExt}; +use object_store::path::Path; +use object_store::ObjectStore; +use tokio::sync::mpsc::channel; + +use crate::{DeltaResult, DeltaTableConfig, DeltaTableError}; + +pub struct Snapshot { + inner: Arc, + engine: Arc, +} + +impl Snapshot { + pub fn new(inner: Arc, engine: Arc) -> Self { + Self { inner, engine } + } + + pub async fn try_new( + table: Table, + store: Arc, + config: DeltaTableConfig, + version: Option, + ) -> DeltaResult { + let executor = Arc::new(TokioMultiThreadExecutor::new( + tokio::runtime::Handle::current(), + )); + let table_root = Path::from_url_path(table.location().path())?; + let engine = DefaultEngine::new(store, table_root, executor); + let snapshot = table.snapshot(&engine, None)?; + + Ok(Self::new(Arc::new(snapshot), Arc::new(engine))) + } + + pub fn protocol(&self) -> &Protocol { + self.inner.protocol() + } + + pub fn metadata(&self) -> &delta_kernel::actions::Metadata { + self.inner.metadata() + } + + pub(crate) fn replay_log(&self) -> DeltaResult<()> { + let log_schema = get_log_schema().project(&[ADD_NAME, REMOVE_NAME])?; + let actions = self.inner._log_segment().replay( + self.engine.as_ref(), + log_schema.clone(), + log_schema.clone(), + None, + )?; + + // let it = scan_action_iter( + // engine, + // self.replay_for_scan_data(engine)?, + // physical_predicate, + // ); + // Ok(Some(it).into_iter().flatten()) + + Ok(()) + } +} + +enum Action { + Metadata(delta_kernel::actions::Metadata), + Protocol(delta_kernel::actions::Protocol), + Remove(delta_kernel::actions::Remove), + Add(delta_kernel::actions::Add), + SetTransaction(delta_kernel::actions::SetTransaction), + Cdc(delta_kernel::actions::Cdc), +} + +static NAMES_AND_TYPES: LazyLock = + LazyLock::new(|| get_log_schema().leaves(None)); + +struct LogVisitor { + actions: Vec<(Action, usize)>, + offsets: HashMap, + previous_rows_seen: usize, +} + +impl LogVisitor { + fn new() -> LogVisitor { + // Grab the start offset for each top-level column name, then compute the end offset by + // skipping the rest of the leaves for that column. + let mut offsets = HashMap::new(); + let mut it = NAMES_AND_TYPES.as_ref().0.iter().enumerate().peekable(); + while let Some((start, col)) = it.next() { + let mut end = start + 1; + while it.next_if(|(_, other)| col[0] == other[0]).is_some() { + end += 1; + } + offsets.insert(col[0].clone(), (start, end)); + } + LogVisitor { + actions: vec![], + offsets, + previous_rows_seen: 0, + } + } +} + +impl RowVisitor for LogVisitor { + fn selected_column_names_and_types(&self) -> (&'static [ColumnName], &'static [DataType]) { + todo!() + } + + fn visit<'a>(&mut self, row_count: usize, getters: &[&'a dyn GetData<'a>]) -> KernelResult<()> { + todo!() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + use deltalake_test::acceptance::{read_dat_case, TestCaseInfo}; + use deltalake_test::TestResult; + use std::path::PathBuf; + + fn get_dat_dir() -> PathBuf { + let d = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + let mut rep_root = d + .parent() + .and_then(|p| p.parent()) + .expect("valid directory") + .to_path_buf(); + rep_root.push("dat/out/reader_tests/generated"); + rep_root + } + + #[tokio::test(flavor = "multi_thread")] + async fn load_snapshot() -> TestResult<()> { + // some comment + let mut dat_dir = get_dat_dir(); + dat_dir.push("basic_append"); + let dat_info: TestCaseInfo = read_dat_case(dat_dir)?; + let table_info = dat_info.table_summary()?; + + let table = Table::try_from_uri(dat_info.table_root()?)?; + + let snapshot = Snapshot::try_new( + table, + Arc::new(object_store::local::LocalFileSystem::default()), + Default::default(), + None, + ) + .await?; + + assert_eq!( + snapshot.protocol().min_reader_version(), + table_info.min_reader_version + ); + + // let table_root = object_store::path::Path::new("s3://delta-rs/test"); + // let store = object_store::ObjectStore::new(&table_root).unwrap(); + // let table = delta::DeltaTable::load(&store, &table_root).await.unwrap(); + // let snapshot = delta::Snapshot::try_new(table_root, table, store, Default::default(), None) + // .await + // .unwrap(); + // snapshot.replay_log().unwrap(); + Ok(()) + } +} diff --git a/crates/test/Cargo.toml b/crates/test/Cargo.toml index 6c93fa705c..1dfaccdc07 100644 --- a/crates/test/Cargo.toml +++ b/crates/test/Cargo.toml @@ -5,9 +5,18 @@ edition = "2021" publish = false [dependencies] +delta_kernel = { workspace = true } +deltalake-core = { version = "0.24.0", path = "../core" } + +arrow-array = { workspace = true, features = ["chrono-tz"] } +arrow-cast = { workspace = true } +arrow-ord = { workspace = true } +arrow-schema = { workspace = true, features = ["serde"] } +arrow-select = { workspace = true } +parquet = { workspace = true, features = ["async", "object_store"] } + bytes = { workspace = true } chrono = { workspace = true, default-features = false, features = ["clock"] } -deltalake-core = { version = "0.24.0", path = "../core" } dotenvy = "0" fs_extra = "1.3.0" futures = { version = "0.3" } @@ -16,7 +25,9 @@ rand = "0.8" serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true } tempfile = "3" +thiserror = { workspace = true } tokio = { version = "1", features = ["macros", "rt-multi-thread"] } +url = { workspace = true } [features] default = [] diff --git a/crates/test/src/acceptance/data.rs b/crates/test/src/acceptance/data.rs new file mode 100644 index 0000000000..6d8ae4dbca --- /dev/null +++ b/crates/test/src/acceptance/data.rs @@ -0,0 +1,130 @@ +use std::{path::Path, sync::Arc}; + +use arrow_array::{Array, RecordBatch}; +use arrow_ord::sort::{lexsort_to_indices, SortColumn}; +use arrow_schema::{DataType, Schema}; +use arrow_select::{concat::concat_batches, take::take}; +use delta_kernel::DeltaResult; +use futures::{stream::TryStreamExt, StreamExt}; +use object_store::{local::LocalFileSystem, ObjectStore}; +use parquet::arrow::async_reader::{ParquetObjectReader, ParquetRecordBatchStreamBuilder}; + +use super::TestCaseInfo; +use crate::TestResult; + +pub async fn read_golden(path: &Path, _version: Option<&str>) -> DeltaResult { + let expected_root = path.join("expected").join("latest").join("table_content"); + let store = Arc::new(LocalFileSystem::new_with_prefix(&expected_root)?); + let files: Vec<_> = store.list(None).try_collect().await?; + let mut batches = vec![]; + let mut schema = None; + for meta in files.into_iter() { + if let Some(ext) = meta.location.extension() { + if ext == "parquet" { + let reader = ParquetObjectReader::new(store.clone(), meta); + let builder = ParquetRecordBatchStreamBuilder::new(reader).await?; + if schema.is_none() { + schema = Some(builder.schema().clone()); + } + let mut stream = builder.build()?; + while let Some(batch) = stream.next().await { + batches.push(batch?); + } + } + } + } + let all_data = concat_batches(&schema.unwrap(), &batches)?; + Ok(all_data) +} + +pub fn sort_record_batch(batch: RecordBatch) -> DeltaResult { + // Sort by all columns + let mut sort_columns = vec![]; + for col in batch.columns() { + match col.data_type() { + DataType::Struct(_) | DataType::List(_) | DataType::Map(_, _) => { + // can't sort structs, lists, or maps + } + _ => sort_columns.push(SortColumn { + values: col.clone(), + options: None, + }), + } + } + let indices = lexsort_to_indices(&sort_columns, None)?; + let columns = batch + .columns() + .iter() + .map(|c| take(c, &indices, None).unwrap()) + .collect(); + Ok(RecordBatch::try_new(batch.schema(), columns)?) +} + +// Ensure that two schema have the same field names, and dict_id/ordering. +// We ignore: +// - data type: This is checked already in `assert_columns_match` +// - nullability: parquet marks many things as nullable that we don't in our schema +// - metadata: because that diverges from the real data to the golden tabled data +fn assert_schema_fields_match(schema: &Schema, golden: &Schema) { + for (schema_field, golden_field) in schema.fields.iter().zip(golden.fields.iter()) { + assert!( + schema_field.name() == golden_field.name(), + "Field names don't match" + ); + assert!( + schema_field.dict_id() == golden_field.dict_id(), + "Field dict_id doesn't match" + ); + assert!( + schema_field.dict_is_ordered() == golden_field.dict_is_ordered(), + "Field dict_is_ordered doesn't match" + ); + } +} + +// some things are equivalent, but don't show up as equivalent for `==`, so we normalize here +fn normalize_col(col: Arc) -> Arc { + if let DataType::Timestamp(unit, Some(zone)) = col.data_type() { + if **zone == *"+00:00" { + arrow_cast::cast::cast(&col, &DataType::Timestamp(*unit, Some("UTC".into()))) + .expect("Could not cast to UTC") + } else { + col + } + } else { + col + } +} + +fn assert_columns_match(actual: &[Arc], expected: &[Arc]) { + for (actual, expected) in actual.iter().zip(expected) { + let actual = normalize_col(actual.clone()); + let expected = normalize_col(expected.clone()); + // note that array equality includes data_type equality + // See: https://arrow.apache.org/rust/arrow_data/equal/fn.equal.html + assert_eq!( + &actual, &expected, + "Column data didn't match. Got {actual:?}, expected {expected:?}" + ); + } +} + +pub async fn assert_scan_data( + all_data: Vec, + test_case: &TestCaseInfo, +) -> TestResult<()> { + let all_data = concat_batches(&all_data[0].schema(), all_data.iter()).unwrap(); + let all_data = sort_record_batch(all_data)?; + + let golden = read_golden(test_case.root_dir(), None).await?; + let golden = sort_record_batch(golden)?; + + assert_columns_match(all_data.columns(), golden.columns()); + assert_schema_fields_match(all_data.schema().as_ref(), golden.schema().as_ref()); + assert!( + all_data.num_rows() == golden.num_rows(), + "Didn't have same number of rows" + ); + + Ok(()) +} diff --git a/crates/test/src/acceptance/meta.rs b/crates/test/src/acceptance/meta.rs new file mode 100644 index 0000000000..6a44f2cb69 --- /dev/null +++ b/crates/test/src/acceptance/meta.rs @@ -0,0 +1,73 @@ +use std::collections::HashMap; +use std::fs::File; +use std::path::{Path, PathBuf}; + +use delta_kernel::{Error, Version}; +use serde::{Deserialize, Serialize}; +use url::Url; + +#[derive(Debug, thiserror::Error)] +pub enum AssertionError { + #[error("Invalid test case data")] + InvalidTestCase, + + #[error("Kernel error: {0}")] + KernelError(#[from] Error), +} + +pub type TestResult = std::result::Result; + +#[derive(Serialize, Deserialize, PartialEq, Eq, Debug)] +struct TestCaseInfoJson { + name: String, + description: String, +} + +#[derive(PartialEq, Eq, Debug)] +pub struct TestCaseInfo { + name: String, + description: String, + root_dir: PathBuf, +} + +impl TestCaseInfo { + /// Root path for this test cases Delta table. + pub fn table_root(&self) -> TestResult { + let table_root = self.root_dir.join("delta"); + Url::from_directory_path(table_root).map_err(|_| AssertionError::InvalidTestCase) + } + + pub fn root_dir(&self) -> &PathBuf { + &self.root_dir + } + + pub fn table_summary(&self) -> TestResult { + let info_path = self + .root_dir() + .join("expected/latest/table_version_metadata.json"); + let file = File::open(info_path).map_err(|_| AssertionError::InvalidTestCase)?; + let info: TableVersionMetaData = + serde_json::from_reader(file).map_err(|_| AssertionError::InvalidTestCase)?; + Ok(info) + } +} + +#[derive(Serialize, Deserialize, PartialEq, Eq, Debug)] +pub struct TableVersionMetaData { + pub version: Version, + pub properties: HashMap, + pub min_reader_version: i32, + pub min_writer_version: i32, +} + +pub fn read_dat_case(case_root: impl AsRef) -> TestResult { + let info_path = case_root.as_ref().join("test_case_info.json"); + let file = File::open(info_path).map_err(|_| AssertionError::InvalidTestCase)?; + let info: TestCaseInfoJson = + serde_json::from_reader(file).map_err(|_| AssertionError::InvalidTestCase)?; + Ok(TestCaseInfo { + root_dir: case_root.as_ref().into(), + name: info.name, + description: info.description, + }) +} diff --git a/crates/test/src/acceptance/mod.rs b/crates/test/src/acceptance/mod.rs new file mode 100644 index 0000000000..521fd294ae --- /dev/null +++ b/crates/test/src/acceptance/mod.rs @@ -0,0 +1,5 @@ +pub mod data; +pub mod meta; + +pub use data::*; +pub use meta::*; diff --git a/crates/test/src/lib.rs b/crates/test/src/lib.rs index dd8c2a2951..6220d7ae1d 100644 --- a/crates/test/src/lib.rs +++ b/crates/test/src/lib.rs @@ -14,6 +14,7 @@ use deltalake_core::DeltaTableBuilder; use deltalake_core::{ObjectStore, Path}; use tempfile::TempDir; +pub mod acceptance; pub mod clock; pub mod concurrent; #[cfg(feature = "datafusion")] From 333198c407e848fe0a8fcd871eb23ae8b2e36ff7 Mon Sep 17 00:00:00 2001 From: Robert Pack Date: Sat, 11 Jan 2025 15:19:09 +0100 Subject: [PATCH 02/23] feat: file action replay Signed-off-by: Robert Pack --- Cargo.toml | 7 +- crates/core/src/kernel/snapshot/next.rs | 337 +++++++++++++++++++----- crates/core/src/protocol/checkpoints.rs | 9 +- 3 files changed, 279 insertions(+), 74 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 8ac14e5209..c1bc6ea502 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,11 +27,14 @@ debug = "line-tables-only" [workspace.dependencies] #delta_kernel = { version = "=0.6.0", features = ["default-engine"] } -#delta_kernel = { path = "../delta-kernel-rs/kernel", features = ["sync-engine"] } -delta_kernel = { git = "https://github.com/roeap/delta-kernel-rs", rev = "fcc43b50dafdc5e6b84c206492bbde8ed1115529", features = [ +delta_kernel = { path = "../delta-kernel-rs/kernel", features = [ "default-engine", "developer-visibility", ] } +# delta_kernel = { git = "https://github.com/roeap/delta-kernel-rs", rev = "fcc43b50dafdc5e6b84c206492bbde8ed1115529", features = [ +# "default-engine", +# "developer-visibility", +# ] } # arrow arrow = { version = "53" } diff --git a/crates/core/src/kernel/snapshot/next.rs b/crates/core/src/kernel/snapshot/next.rs index 7189fc75cd..3e7ab01fe2 100644 --- a/crates/core/src/kernel/snapshot/next.rs +++ b/crates/core/src/kernel/snapshot/next.rs @@ -1,31 +1,75 @@ -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use std::sync::{Arc, LazyLock}; use ::serde::{Deserialize, Serialize}; -use arrow_array::RecordBatch; +use arrow::compute::{concat_batches, filter_record_batch}; +use arrow_arith::boolean::{and, is_null, not}; +use arrow_array::{BooleanArray, RecordBatch}; use delta_kernel::actions::{ - get_log_schema, ADD_NAME, CDC_NAME, METADATA_NAME, PROTOCOL_NAME, REMOVE_NAME, - SET_TRANSACTION_NAME, + get_log_add_schema, get_log_schema, Add, ADD_NAME, CDC_NAME, METADATA_NAME, PROTOCOL_NAME, + REMOVE_NAME, SET_TRANSACTION_NAME, }; use delta_kernel::actions::{Metadata, Protocol}; +use delta_kernel::engine::arrow_data::ArrowEngineData; use delta_kernel::engine::default::executor::tokio::{ TokioBackgroundExecutor, TokioMultiThreadExecutor, }; use delta_kernel::engine::default::DefaultEngine; use delta_kernel::engine_data::{GetData, RowVisitor, TypedGetData as _}; use delta_kernel::expressions::ColumnName; +use delta_kernel::scan::log_replay::scan_action_iter; use delta_kernel::scan::state::{DvInfo, Stats}; -use delta_kernel::scan::ScanBuilder; -use delta_kernel::schema::{ColumnNamesAndTypes, DataType}; +use delta_kernel::scan::{scan_row_schema, PhysicalPredicate, ScanBuilder, ScanData}; +use delta_kernel::schema::{ColumnNamesAndTypes, DataType, Schema}; use delta_kernel::snapshot::Snapshot as SnapshotInner; -use delta_kernel::{DeltaResult as KernelResult, Engine, Error, Table}; +use delta_kernel::table_properties::TableProperties; +use delta_kernel::{ + DeltaResult as KernelResult, Engine, EngineData, Error, Expression, Table, Version, +}; use futures::{StreamExt, TryStreamExt}; +use itertools::Itertools; use object_store::path::Path; use object_store::ObjectStore; -use tokio::sync::mpsc::channel; +use tracing::warn; +use url::Url; +use crate::kernel::ActionType; use crate::{DeltaResult, DeltaTableConfig, DeltaTableError}; +type ReplayIter = Box, Vec)>>>; + +impl ActionType { + pub(self) fn field_name_unckecked(&self) -> &'static str { + match self { + Self::Metadata => METADATA_NAME, + Self::Protocol => PROTOCOL_NAME, + Self::Remove => REMOVE_NAME, + Self::Add => ADD_NAME, + Self::Txn => SET_TRANSACTION_NAME, + Self::Cdc => CDC_NAME, + _ => panic!(), + } + } + + pub(self) fn field_name(&self) -> DeltaResult<&'static str> { + let name = match self { + Self::Metadata => METADATA_NAME, + Self::Protocol => PROTOCOL_NAME, + Self::Remove => REMOVE_NAME, + Self::Add => ADD_NAME, + Self::Txn => SET_TRANSACTION_NAME, + Self::Cdc => CDC_NAME, + _ => { + return Err(DeltaTableError::generic(format!( + "unsupported action type: {self:?}" + ))) + } + }; + Ok(name) + } +} + +#[derive(Clone)] pub struct Snapshot { inner: Arc, engine: Arc, @@ -39,19 +83,39 @@ impl Snapshot { pub async fn try_new( table: Table, store: Arc, - config: DeltaTableConfig, version: Option, ) -> DeltaResult { + // let executor = Arc::new(TokioMultiThreadExecutor::new( + // config + // .io_runtime + // .map(|rt| rt.get_handle()) + // .unwrap_or(tokio::runtime::Handle::current()), + // )); let executor = Arc::new(TokioMultiThreadExecutor::new( tokio::runtime::Handle::current(), )); let table_root = Path::from_url_path(table.location().path())?; let engine = DefaultEngine::new(store, table_root, executor); - let snapshot = table.snapshot(&engine, None)?; - + let snapshot = table.snapshot(&engine, version.map(|v| v as u64))?; Ok(Self::new(Arc::new(snapshot), Arc::new(engine))) } + pub(crate) fn engine_ref(&self) -> &Arc { + &self.engine + } + + pub fn table_root(&self) -> &Url { + &self.inner.table_root() + } + + pub fn version(&self) -> u64 { + self.inner.version() + } + + pub fn schema(&self) -> &Schema { + self.inner.schema() + } + pub fn protocol(&self) -> &Protocol { self.inner.protocol() } @@ -60,72 +124,190 @@ impl Snapshot { self.inner.metadata() } - pub(crate) fn replay_log(&self) -> DeltaResult<()> { - let log_schema = get_log_schema().project(&[ADD_NAME, REMOVE_NAME])?; - let actions = self.inner._log_segment().replay( - self.engine.as_ref(), - log_schema.clone(), - log_schema.clone(), - None, - )?; + pub fn table_properties(&self) -> &TableProperties { + &self.inner.table_properties() + } + + /// Get the timestamp of the given version in miliscends since epoch. + /// + /// Extracts the timestamp from the commit file of the given version + /// from the current log segment. If the commit file is not part of the + /// current log segment, `None` is returned. + pub fn version_timestamp(&self, version: Version) -> Option { + self.inner + ._log_segment() + .ascending_commit_files + .iter() + .find(|f| f.version == version) + .map(|f| f.location.last_modified) + } + fn log_data( + &self, + types: &[ActionType], + ) -> DeltaResult>> { + let field_names = types + .iter() + .filter_map(|t| t.field_name().ok()) + .collect::>(); + if field_names.len() != types.len() { + warn!("skipping unsupported action types"); + } + let log_schema = get_log_schema().project(&field_names)?; + Ok(self + .inner + ._log_segment() + .replay( + self.engine.as_ref(), + log_schema.clone(), + log_schema.clone(), + None, + )? + .map_ok(|(d, flag)| { + Ok(( + RecordBatch::from(ArrowEngineData::try_from_engine_data(d)?), + flag, + )) + }) + .flatten()) // let it = scan_action_iter( // engine, // self.replay_for_scan_data(engine)?, // physical_predicate, // ); // Ok(Some(it).into_iter().flatten()) - - Ok(()) } } -enum Action { - Metadata(delta_kernel::actions::Metadata), - Protocol(delta_kernel::actions::Protocol), - Remove(delta_kernel::actions::Remove), - Add(delta_kernel::actions::Add), - SetTransaction(delta_kernel::actions::SetTransaction), - Cdc(delta_kernel::actions::Cdc), +#[derive(Clone)] +pub struct EagerSnapshot { + snapshot: Snapshot, + files: RecordBatch, + actions: Option, } -static NAMES_AND_TYPES: LazyLock = - LazyLock::new(|| get_log_schema().leaves(None)); +impl EagerSnapshot { + /// Create a new [`EagerSnapshot`] instance tracking actions of the given types. + /// + /// Only actions supplied by `tracked_actions` will be loaded into memory. + /// This is useful when only a subset of actions are needed. `Add` and `Remove` actions + /// are treated specially. I.e. `Add` and `Remove` will be loaded as well. + pub async fn try_new_with_actions( + table_root: impl AsRef, + store: Arc, + config: DeltaTableConfig, + version: Option, + tracked_actions: HashSet, + predicate: Option, + ) -> DeltaResult { + let mut replay_actions = Vec::new(); + if config.require_files { + replay_actions.push(ActionType::Add); + replay_actions.push(ActionType::Remove); + } + replay_actions.extend(tracked_actions.into_iter().filter(|it| { + !config.require_files || (it != &ActionType::Add && it != &ActionType::Remove) + })); -struct LogVisitor { - actions: Vec<(Action, usize)>, - offsets: HashMap, - previous_rows_seen: usize, -} + let snapshot = Snapshot::try_new(Table::try_from_uri(table_root)?, store, version).await?; + + let mut replay_data = Vec::new(); + let mut action_data = Vec::new(); + for slice in snapshot.log_data(&replay_actions)? { + let (batch, flag) = slice?; -impl LogVisitor { - fn new() -> LogVisitor { - // Grab the start offset for each top-level column name, then compute the end offset by - // skipping the rest of the leaves for that column. - let mut offsets = HashMap::new(); - let mut it = NAMES_AND_TYPES.as_ref().0.iter().enumerate().peekable(); - while let Some((start, col)) = it.next() { - let mut end = start + 1; - while it.next_if(|(_, other)| col[0] == other[0]).is_some() { - end += 1; + let action_projection = replay_actions + .iter() + .filter_map(|t| { + (t != &ActionType::Add && t != &ActionType::Remove) + .then_some( + t.field_name() + .ok() + .and_then(|n| batch.schema_ref().index_of(n).ok()), + ) + .flatten() + }) + .collect_vec(); + + if !action_projection.is_empty() { + action_data.push(batch.project(&action_projection)?); + } + + if config.require_files { + let file_data = batch.project(&[0, 1])?; + let file_data = filter_record_batch( + &file_data, + ¬(&and( + &is_null(batch.column(0))?, + &is_null(batch.column(1))?, + )?)?, + )?; + replay_data.push(Ok(( + Box::new(ArrowEngineData::from(file_data)) as Box, + flag, + ))); } - offsets.insert(col[0].clone(), (start, end)); - } - LogVisitor { - actions: vec![], - offsets, - previous_rows_seen: 0, } + + let files_schema = Arc::new(get_log_add_schema().as_ref().try_into()?); + let scan_schema = Arc::new((&scan_row_schema()).try_into()?); + + let files = if !replay_data.is_empty() { + let (engine, action_iter) = (snapshot.engine_ref().as_ref(), replay_data.into_iter()); + let physical_predicate = + predicate.and_then(|p| PhysicalPredicate::try_new(&p, snapshot.schema()).ok()); + + let it: ReplayIter = match physical_predicate { + Some(PhysicalPredicate::StaticSkipAll) => Box::new(std::iter::empty()), + Some(PhysicalPredicate::Some(p, s)) => { + Box::new(scan_action_iter(engine, action_iter, Some((p, s)))) + } + None | Some(PhysicalPredicate::None) => { + Box::new(scan_action_iter(engine, action_iter, None)) + } + }; + + let mut filtered = Vec::new(); + for res in it { + let (batch, selection) = res?; + let predicate = BooleanArray::from(selection); + let data: RecordBatch = ArrowEngineData::try_from_engine_data(batch)?.into(); + filtered.push(filter_record_batch(&data, &predicate)?); + } + concat_batches(&scan_schema, &filtered)? + } else { + RecordBatch::new_empty(scan_schema.clone()) + }; + + let actions = (!action_data.is_empty()) + .then(|| concat_batches(&files_schema, &action_data).ok()) + .flatten(); + + Ok(Self { + snapshot, + files, + actions, + }) } -} -impl RowVisitor for LogVisitor { - fn selected_column_names_and_types(&self) -> (&'static [ColumnName], &'static [DataType]) { - todo!() + pub fn version(&self) -> u64 { + self.snapshot.version() } - fn visit<'a>(&mut self, row_count: usize, getters: &[&'a dyn GetData<'a>]) -> KernelResult<()> { - todo!() + pub fn schema(&self) -> &Schema { + self.snapshot.schema() + } + + pub fn protocol(&self) -> &Protocol { + self.snapshot.protocol() + } + + pub fn metadata(&self) -> &delta_kernel::actions::Metadata { + self.snapshot.metadata() + } + + pub fn table_properties(&self) -> &TableProperties { + &self.snapshot.table_properties() } } @@ -133,6 +315,7 @@ impl RowVisitor for LogVisitor { mod tests { use super::*; + use arrow_cast::pretty::print_batches; use deltalake_test::acceptance::{read_dat_case, TestCaseInfo}; use deltalake_test::TestResult; use std::path::PathBuf; @@ -152,7 +335,7 @@ mod tests { async fn load_snapshot() -> TestResult<()> { // some comment let mut dat_dir = get_dat_dir(); - dat_dir.push("basic_append"); + dat_dir.push("multi_partitioned"); let dat_info: TestCaseInfo = read_dat_case(dat_dir)?; let table_info = dat_info.table_summary()?; @@ -161,23 +344,47 @@ mod tests { let snapshot = Snapshot::try_new( table, Arc::new(object_store::local::LocalFileSystem::default()), + None, + ) + .await?; + + assert_eq!(snapshot.version(), table_info.version); + assert_eq!( + snapshot.protocol().min_reader_version(), + table_info.min_reader_version + ); + + Ok(()) + } + + #[tokio::test(flavor = "multi_thread")] + async fn load_eager_snapshot() -> TestResult<()> { + // some comment + let mut dat_dir = get_dat_dir(); + dat_dir.push("multi_partitioned"); + let dat_info: TestCaseInfo = read_dat_case(dat_dir)?; + let table_info = dat_info.table_summary()?; + + let table = Table::try_from_uri(dat_info.table_root()?)?; + + let snapshot = EagerSnapshot::try_new_with_actions( + table.location(), + Arc::new(object_store::local::LocalFileSystem::default()), + Default::default(), + None, Default::default(), None, ) .await?; + assert_eq!(snapshot.version(), table_info.version); assert_eq!( snapshot.protocol().min_reader_version(), table_info.min_reader_version ); - // let table_root = object_store::path::Path::new("s3://delta-rs/test"); - // let store = object_store::ObjectStore::new(&table_root).unwrap(); - // let table = delta::DeltaTable::load(&store, &table_root).await.unwrap(); - // let snapshot = delta::Snapshot::try_new(table_root, table, store, Default::default(), None) - // .await - // .unwrap(); - // snapshot.replay_log().unwrap(); + print_batches(&[snapshot.files])?; + Ok(()) } } diff --git a/crates/core/src/protocol/checkpoints.rs b/crates/core/src/protocol/checkpoints.rs index 72dbec7828..1fada05d38 100644 --- a/crates/core/src/protocol/checkpoints.rs +++ b/crates/core/src/protocol/checkpoints.rs @@ -170,12 +170,7 @@ pub async fn create_checkpoint_for( return Err(CheckpointError::StaleTableVersion(version, state.version()).into()); } - // TODO: checkpoints _can_ be multi-part... haven't actually found a good reference for - // an appropriate split point yet though so only writing a single part currently. - // See https://github.com/delta-io/delta-rs/issues/288 let last_checkpoint_path = log_store.log_path().child("_last_checkpoint"); - - debug!("Writing parquet bytes to checkpoint buffer."); let tombstones = state .unexpired_tombstones(log_store.object_store(None).clone()) .await @@ -291,9 +286,9 @@ fn parquet_bytes_from_state( // and omit metadata columns if at least one remove action has `extended_file_metadata=false`. // We've added the additional check on `size.is_some` because in delta-spark the primitive long type // is used, hence we want to omit possible errors when `extended_file_metadata=true`, but `size=null` - let use_extended_remove_schema = tombstones + let use_extended_remove_schema = !tombstones .iter() - .all(|r| r.extended_file_metadata == Some(true) && r.size.is_some()); + .any(|r| r.extended_file_metadata == Some(false) || r.size.is_none()); // If use_extended_remove_schema=false for some of the tombstones, then it should be for each. if !use_extended_remove_schema { From 0f2c1c465ec9ab090517c4d7f840c81758f2f712 Mon Sep 17 00:00:00 2001 From: Robert Pack Date: Mon, 13 Jan 2025 15:43:52 +0100 Subject: [PATCH 03/23] feat: add objectstore with commit file caching Signed-off-by: Robert Pack --- crates/core/Cargo.toml | 5 + crates/core/src/storage/cache.rs | 186 +++++++++++++++++++++++++++++++ crates/core/src/storage/mod.rs | 2 + 3 files changed, 193 insertions(+) create mode 100644 crates/core/src/storage/cache.rs diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml index d7143983a7..0bdadfbdab 100644 --- a/crates/core/Cargo.toml +++ b/crates/core/Cargo.toml @@ -73,6 +73,9 @@ tokio = { workspace = true, features = [ "parking_lot", ] } +# cahce +quick_cache = { version = "0.6.9", optional = true } + # other deps (these should be organized and pulled into workspace.dependencies as necessary) cfg-if = "1" dashmap = "6" @@ -98,6 +101,7 @@ humantime = { version = "2.1.0" } [dev-dependencies] criterion = "0.5" ctor = "0" +datatest-stable = "0.2" deltalake-test = { path = "../test", features = ["datafusion"] } dotenvy = "0" fs_extra = "1.2.0" @@ -126,3 +130,4 @@ datafusion = [ datafusion-ext = ["datafusion"] json = ["parquet/json"] python = ["arrow/pyarrow"] +log-cache = ["quick_cache"] diff --git a/crates/core/src/storage/cache.rs b/crates/core/src/storage/cache.rs new file mode 100644 index 0000000000..4b44da1a78 --- /dev/null +++ b/crates/core/src/storage/cache.rs @@ -0,0 +1,186 @@ +use std::sync::Arc; + +use bytes::Bytes; +use chrono::{DateTime, Utc}; +use futures::stream::BoxStream; +use futures::StreamExt; +use object_store::path::Path; +use object_store::{ + Attributes, GetOptions, GetResult, GetResultPayload, ListResult, MultipartUpload, ObjectMeta, + ObjectStore, PutMultipartOpts, PutOptions, PutPayload, PutResult, Result as ObjectStoreResult, +}; +use quick_cache::sync::Cache; + +#[derive(Debug, Clone)] +struct Entry { + data: Bytes, + last_modified: DateTime, + attributes: Attributes, + e_tag: Option, +} + +impl Entry { + fn new( + data: Bytes, + last_modified: DateTime, + e_tag: Option, + attributes: Attributes, + ) -> Self { + Self { + data, + last_modified, + e_tag, + attributes, + } + } +} + +/// An object store implementation that conditionally caches file requests. +/// +/// This implementation caches the file requests based on on the evaluation +/// of a condition. The condition is evaluated on the path of the file and +/// can be configured to meet the requirements of the user. +/// +/// This is __not__ a general purpose cache and is specifically designed to cache +/// the commit files of a Delta table. E.g. it is assumed that files written to +/// the object store are immutable and no attempt is made to invalidate the cache +/// when files are updated in the remote object store. +#[derive(Clone)] +pub(crate) struct ConditionallyCachedObjectStore { + inner: Arc, + check: Arc bool + Send + Sync>, + cache: Arc>, +} + +impl std::fmt::Debug for ConditionallyCachedObjectStore { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("ConditionallyCachedObjectStore") + .field("object_store", &self.inner) + .finish() + } +} + +impl std::fmt::Display for ConditionallyCachedObjectStore { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "ConditionallyCachedObjectStore({})", self.inner) + } +} + +fn cache_json(path: &Path) -> bool { + path.extension() + .map_or(false, |ext| ext.eq_ignore_ascii_case("json")) +} + +impl ConditionallyCachedObjectStore { + /// Create a new conditionally cached object store. + pub fn new(inner: Arc) -> Self { + Self { + inner, + check: Arc::new(cache_json), + cache: Arc::new(Cache::new(100)), + } + } + + async fn get_opts_impl( + &self, + location: &Path, + options: GetOptions, + ) -> ObjectStoreResult { + if options.range.is_some() || !(self.check)(location) || options.head { + return self.inner.get_opts(location, options).await; + } + + let entry = if let Some(entry) = self.cache.get(location) { + entry + } else { + let response = self.inner.get_opts(location, options.clone()).await?; + let attributes = response.attributes.clone(); + let meta = response.meta.clone(); + let data = response.bytes().await?; + let entry = Entry::new(data, meta.last_modified, meta.e_tag, attributes); + self.cache.insert(location.clone(), entry.clone()); + entry + }; + + let meta = ObjectMeta { + location: location.clone(), + last_modified: entry.last_modified, + size: entry.data.len(), + e_tag: entry.e_tag, + version: None, + }; + let (range, data) = (0..entry.data.len(), entry.data); + let stream = futures::stream::once(futures::future::ready(Ok(data))); + Ok(GetResult { + payload: GetResultPayload::Stream(stream.boxed()), + attributes: entry.attributes, + meta, + range, + }) + } +} + +#[async_trait::async_trait] +impl ObjectStore for ConditionallyCachedObjectStore { + async fn put_opts( + &self, + location: &Path, + bytes: PutPayload, + options: PutOptions, + ) -> ObjectStoreResult { + self.inner.put_opts(location, bytes, options).await + } + + async fn get_opts(&self, location: &Path, options: GetOptions) -> ObjectStoreResult { + self.get_opts_impl(location, options).await + } + + async fn head(&self, location: &Path) -> ObjectStoreResult { + self.inner.head(location).await + } + + async fn delete(&self, location: &Path) -> ObjectStoreResult<()> { + self.cache.remove(location); + self.inner.delete(location).await + } + + fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, ObjectStoreResult> { + self.inner.list(prefix) + } + + fn list_with_offset( + &self, + prefix: Option<&Path>, + offset: &Path, + ) -> BoxStream<'_, ObjectStoreResult> { + self.inner.list_with_offset(prefix, offset) + } + + async fn list_with_delimiter(&self, prefix: Option<&Path>) -> ObjectStoreResult { + self.inner.list_with_delimiter(prefix).await + } + + async fn copy(&self, from: &Path, to: &Path) -> ObjectStoreResult<()> { + self.inner.copy(from, to).await + } + + async fn copy_if_not_exists(&self, from: &Path, to: &Path) -> ObjectStoreResult<()> { + self.inner.copy_if_not_exists(from, to).await + } + + async fn rename_if_not_exists(&self, from: &Path, to: &Path) -> ObjectStoreResult<()> { + self.inner.rename_if_not_exists(from, to).await + } + + async fn put_multipart(&self, location: &Path) -> ObjectStoreResult> { + self.inner.put_multipart(location).await + } + + async fn put_multipart_opts( + &self, + location: &Path, + options: PutMultipartOpts, + ) -> ObjectStoreResult> { + self.inner.put_multipart_opts(location, options).await + } +} diff --git a/crates/core/src/storage/mod.rs b/crates/core/src/storage/mod.rs index 8361bf138e..f44e86753d 100644 --- a/crates/core/src/storage/mod.rs +++ b/crates/core/src/storage/mod.rs @@ -30,6 +30,8 @@ pub use retry_ext::ObjectStoreRetryExt; use std::ops::Range; pub use utils::*; +#[cfg(feature = "log-cache")] +pub mod cache; pub mod file; pub mod retry_ext; pub mod utils; From 55565aed18868a5d28a7ff637e6ed24d7e313c79 Mon Sep 17 00:00:00 2001 From: Robert Pack Date: Mon, 13 Jan 2025 16:59:27 +0100 Subject: [PATCH 04/23] feat: add owned file view Signed-off-by: Robert Pack --- crates/core/Cargo.toml | 3 +- crates/core/src/kernel/snapshot/next.rs | 221 ++++++++++++++++++++---- crates/core/src/storage/cache.rs | 16 +- 3 files changed, 196 insertions(+), 44 deletions(-) diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml index 0bdadfbdab..232cc5f00d 100644 --- a/crates/core/Cargo.toml +++ b/crates/core/Cargo.toml @@ -74,7 +74,7 @@ tokio = { workspace = true, features = [ ] } # cahce -quick_cache = { version = "0.6.9", optional = true } +quick_cache = { version = "0.6.9" } # other deps (these should be organized and pulled into workspace.dependencies as necessary) cfg-if = "1" @@ -130,4 +130,3 @@ datafusion = [ datafusion-ext = ["datafusion"] json = ["parquet/json"] python = ["arrow/pyarrow"] -log-cache = ["quick_cache"] diff --git a/crates/core/src/kernel/snapshot/next.rs b/crates/core/src/kernel/snapshot/next.rs index 3e7ab01fe2..7e2acbb1de 100644 --- a/crates/core/src/kernel/snapshot/next.rs +++ b/crates/core/src/kernel/snapshot/next.rs @@ -1,43 +1,46 @@ -use std::collections::{HashMap, HashSet}; -use std::sync::{Arc, LazyLock}; +use std::collections::HashSet; +use std::sync::Arc; -use ::serde::{Deserialize, Serialize}; use arrow::compute::{concat_batches, filter_record_batch}; use arrow_arith::boolean::{and, is_null, not}; -use arrow_array::{BooleanArray, RecordBatch}; +use arrow_array::cast::AsArray; +use arrow_array::types::Int64Type; +use arrow_array::{Array, BooleanArray, RecordBatch}; +use chrono::{DateTime, Utc}; +use delta_kernel::actions::set_transaction::{SetTransactionMap, SetTransactionScanner}; use delta_kernel::actions::{ - get_log_add_schema, get_log_schema, Add, ADD_NAME, CDC_NAME, METADATA_NAME, PROTOCOL_NAME, + get_log_add_schema, get_log_schema, ADD_NAME, CDC_NAME, METADATA_NAME, PROTOCOL_NAME, REMOVE_NAME, SET_TRANSACTION_NAME, }; -use delta_kernel::actions::{Metadata, Protocol}; +use delta_kernel::actions::{Metadata, Protocol, SetTransaction}; use delta_kernel::engine::arrow_data::ArrowEngineData; use delta_kernel::engine::default::executor::tokio::{ TokioBackgroundExecutor, TokioMultiThreadExecutor, }; use delta_kernel::engine::default::DefaultEngine; use delta_kernel::engine_data::{GetData, RowVisitor, TypedGetData as _}; -use delta_kernel::expressions::ColumnName; +use delta_kernel::expressions::{Scalar, StructData}; use delta_kernel::scan::log_replay::scan_action_iter; -use delta_kernel::scan::state::{DvInfo, Stats}; -use delta_kernel::scan::{scan_row_schema, PhysicalPredicate, ScanBuilder, ScanData}; -use delta_kernel::schema::{ColumnNamesAndTypes, DataType, Schema}; +use delta_kernel::scan::{scan_row_schema, PhysicalPredicate}; +use delta_kernel::schema::Schema; use delta_kernel::snapshot::Snapshot as SnapshotInner; use delta_kernel::table_properties::TableProperties; -use delta_kernel::{ - DeltaResult as KernelResult, Engine, EngineData, Error, Expression, Table, Version, -}; -use futures::{StreamExt, TryStreamExt}; +use delta_kernel::{DeltaResult as KernelResult, Engine, EngineData, Expression, Table, Version}; use itertools::Itertools; use object_store::path::Path; use object_store::ObjectStore; use tracing::warn; use url::Url; +use crate::kernel::scalars::ScalarExt; use crate::kernel::ActionType; +use crate::storage::cache::CommitCacheObjectStore; use crate::{DeltaResult, DeltaTableConfig, DeltaTableError}; type ReplayIter = Box, Vec)>>>; +type LocalFileSystem = CommitCacheObjectStore; + impl ActionType { pub(self) fn field_name_unckecked(&self) -> &'static str { match self { @@ -76,28 +79,42 @@ pub struct Snapshot { } impl Snapshot { + /// Create a new [`Snapshot`] instance. pub fn new(inner: Arc, engine: Arc) -> Self { Self { inner, engine } } + /// Create a new [`Snapshot`] instance for a table. pub async fn try_new( table: Table, store: Arc, version: Option, ) -> DeltaResult { - // let executor = Arc::new(TokioMultiThreadExecutor::new( - // config - // .io_runtime - // .map(|rt| rt.get_handle()) - // .unwrap_or(tokio::runtime::Handle::current()), - // )); - let executor = Arc::new(TokioMultiThreadExecutor::new( - tokio::runtime::Handle::current(), - )); + // TODO: how to deal with the dedicated IO runtime? Would this already be covered by the + // object store implementation pass to this? let table_root = Path::from_url_path(table.location().path())?; - let engine = DefaultEngine::new(store, table_root, executor); - let snapshot = table.snapshot(&engine, version.map(|v| v as u64))?; - Ok(Self::new(Arc::new(snapshot), Arc::new(engine))) + let store_str = format!("{}", store); + let is_local = store_str.starts_with("LocalFileSystem"); + let store = Arc::new(CommitCacheObjectStore::new(store)); + let handle = tokio::runtime::Handle::current(); + let engine: Arc = match handle.runtime_flavor() { + tokio::runtime::RuntimeFlavor::MultiThread => Arc::new(DefaultEngine::new_with_opts( + store, + table_root, + Arc::new(TokioMultiThreadExecutor::new(handle)), + !is_local, + )), + tokio::runtime::RuntimeFlavor::CurrentThread => Arc::new(DefaultEngine::new_with_opts( + store, + table_root, + Arc::new(TokioBackgroundExecutor::new()), + !is_local, + )), + _ => return Err(DeltaTableError::generic("unsupported runtime flavor")), + }; + + let snapshot = table.snapshot(engine.as_ref(), version.map(|v| v as u64))?; + Ok(Self::new(Arc::new(snapshot), engine)) } pub(crate) fn engine_ref(&self) -> &Arc { @@ -120,7 +137,7 @@ impl Snapshot { self.inner.protocol() } - pub fn metadata(&self) -> &delta_kernel::actions::Metadata { + pub fn metadata(&self) -> &Metadata { self.inner.metadata() } @@ -142,6 +159,28 @@ impl Snapshot { .map(|f| f.location.last_modified) } + /// Scan the Delta Log to obtain the latest transaction for all applications + /// + /// This method requires a full scan of the log to find all transactions. + /// When a specific application id is requested, it is much more efficient to use + /// [`application_transaction`](Self::application_transaction) instead. + pub fn application_transactions(&self) -> DeltaResult { + let scanner = SetTransactionScanner::new(self.inner.clone()); + Ok(scanner.application_transactions(self.engine.as_ref())?) + } + + /// Scan the Delta Log for the latest transaction entry for a specific application. + /// + /// Initiates a log scan, but terminates as soon as the transaction + /// for the given application is found. + pub fn application_transaction( + &self, + app_id: impl AsRef, + ) -> DeltaResult> { + let scanner = SetTransactionScanner::new(self.inner.clone()); + Ok(scanner.application_transaction(self.engine.as_ref(), app_id.as_ref())?) + } + fn log_data( &self, types: &[ActionType], @@ -170,12 +209,6 @@ impl Snapshot { )) }) .flatten()) - // let it = scan_action_iter( - // engine, - // self.replay_for_scan_data(engine)?, - // physical_predicate, - // ); - // Ok(Some(it).into_iter().flatten()) } } @@ -309,6 +342,109 @@ impl EagerSnapshot { pub fn table_properties(&self) -> &TableProperties { &self.snapshot.table_properties() } + + pub fn files(&self) -> impl Iterator { + LogicalFileView { + files: self.files.clone(), + index: 0, + } + } + + /// Get the number of files in the current snapshot + pub fn files_count(&self) -> usize { + self.files.num_rows() + } +} + +/// Helper trait to extract individual values from a `StructData`. +pub trait StructDataExt { + fn get(&self, key: &str) -> Option<&Scalar>; +} + +impl StructDataExt for StructData { + fn get(&self, key: &str) -> Option<&Scalar> { + self.fields() + .iter() + .zip(self.values().iter()) + .find(|(k, _)| k.name() == key) + .map(|(_, v)| v) + } +} + +#[derive(Clone)] +pub struct LogicalFileView { + files: RecordBatch, + index: usize, +} + +impl LogicalFileView { + /// Path of the file. + pub fn path(&self) -> &str { + self.files.column(0).as_string::().value(self.index) + } + + /// Size of the file in bytes. + pub fn size(&self) -> i64 { + self.files + .column(1) + .as_primitive::() + .value(self.index) + } + + /// Modification time of the file in milliseconds since epoch. + pub fn modification_time(&self) -> i64 { + self.files + .column(2) + .as_primitive::() + .value(self.index) + } + + /// Datetime of the last modification time of the file. + pub fn modification_datetime(&self) -> DeltaResult> { + DateTime::from_timestamp_millis(self.modification_time()).ok_or(DeltaTableError::from( + crate::protocol::ProtocolError::InvalidField(format!( + "invalid modification_time: {:?}", + self.modification_time() + )), + )) + } + + pub fn stats(&self) -> Option<&str> { + let col = self.files.column(3).as_string::(); + col.is_valid(self.index).then(|| col.value(self.index)) + } + + pub fn partition_values(&self) -> Option { + self.files + .column_by_name("fileConstantValues") + .and_then(|col| col.as_struct_opt()) + .and_then(|s| s.column_by_name("partitionValues")) + .and_then(|arr| { + arr.is_valid(self.index) + .then(|| match Scalar::from_array(arr, self.index) { + Some(Scalar::Struct(s)) => Some(s), + _ => None, + }) + .flatten() + }) + } +} + +impl Iterator for LogicalFileView { + type Item = LogicalFileView; + + fn next(&mut self) -> Option { + if self.index < self.files.num_rows() { + let file = LogicalFileView { + files: self.files.clone(), + index: self.index, + }; + self.index += 1; + Some(file) + } else { + None + } + } } #[cfg(test)] @@ -331,11 +467,11 @@ mod tests { rep_root } - #[tokio::test(flavor = "multi_thread")] async fn load_snapshot() -> TestResult<()> { // some comment let mut dat_dir = get_dat_dir(); dat_dir.push("multi_partitioned"); + let dat_info: TestCaseInfo = read_dat_case(dat_dir)?; let table_info = dat_info.table_summary()?; @@ -350,14 +486,27 @@ mod tests { assert_eq!(snapshot.version(), table_info.version); assert_eq!( - snapshot.protocol().min_reader_version(), - table_info.min_reader_version + ( + snapshot.protocol().min_reader_version(), + snapshot.protocol().min_writer_version() + ), + (table_info.min_reader_version, table_info.min_writer_version) ); Ok(()) } #[tokio::test(flavor = "multi_thread")] + async fn load_snapshot_multi() -> TestResult<()> { + load_snapshot().await + } + + #[tokio::test(flavor = "current_thread")] + async fn load_snapshot_current() -> TestResult<()> { + load_snapshot().await + } + + #[tokio::test] async fn load_eager_snapshot() -> TestResult<()> { // some comment let mut dat_dir = get_dat_dir(); diff --git a/crates/core/src/storage/cache.rs b/crates/core/src/storage/cache.rs index 4b44da1a78..eb6b5bd785 100644 --- a/crates/core/src/storage/cache.rs +++ b/crates/core/src/storage/cache.rs @@ -3,7 +3,7 @@ use std::sync::Arc; use bytes::Bytes; use chrono::{DateTime, Utc}; use futures::stream::BoxStream; -use futures::StreamExt; +use futures::{StreamExt, TryStreamExt}; use object_store::path::Path; use object_store::{ Attributes, GetOptions, GetResult, GetResultPayload, ListResult, MultipartUpload, ObjectMeta, @@ -46,13 +46,14 @@ impl Entry { /// the object store are immutable and no attempt is made to invalidate the cache /// when files are updated in the remote object store. #[derive(Clone)] -pub(crate) struct ConditionallyCachedObjectStore { +pub(crate) struct CommitCacheObjectStore { inner: Arc, check: Arc bool + Send + Sync>, cache: Arc>, + has_ordered_listing: bool, } -impl std::fmt::Debug for ConditionallyCachedObjectStore { +impl std::fmt::Debug for CommitCacheObjectStore { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("ConditionallyCachedObjectStore") .field("object_store", &self.inner) @@ -60,7 +61,7 @@ impl std::fmt::Debug for ConditionallyCachedObjectStore { } } -impl std::fmt::Display for ConditionallyCachedObjectStore { +impl std::fmt::Display for CommitCacheObjectStore { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "ConditionallyCachedObjectStore({})", self.inner) } @@ -71,13 +72,16 @@ fn cache_json(path: &Path) -> bool { .map_or(false, |ext| ext.eq_ignore_ascii_case("json")) } -impl ConditionallyCachedObjectStore { +impl CommitCacheObjectStore { /// Create a new conditionally cached object store. pub fn new(inner: Arc) -> Self { + let store_str = format!("{}", inner); + let is_local = store_str.starts_with("LocalFileSystem"); Self { inner, check: Arc::new(cache_json), cache: Arc::new(Cache::new(100)), + has_ordered_listing: !is_local, } } @@ -121,7 +125,7 @@ impl ConditionallyCachedObjectStore { } #[async_trait::async_trait] -impl ObjectStore for ConditionallyCachedObjectStore { +impl ObjectStore for CommitCacheObjectStore { async fn put_opts( &self, location: &Path, From 3d6d263ed3b7925384f7ad526ba10bc484beedd3 Mon Sep 17 00:00:00 2001 From: Robert Pack Date: Thu, 16 Jan 2025 01:58:37 +0100 Subject: [PATCH 05/23] feat: basic updates of file state Signed-off-by: Robert Pack --- crates/core/src/kernel/snapshot/next.rs | 371 +++++++++++++++--------- crates/core/src/operations/vacuum.rs | 1 + crates/core/src/protocol/checkpoints.rs | 2 +- crates/core/src/storage/mod.rs | 3 +- 4 files changed, 231 insertions(+), 146 deletions(-) diff --git a/crates/core/src/kernel/snapshot/next.rs b/crates/core/src/kernel/snapshot/next.rs index 7e2acbb1de..56127d21c0 100644 --- a/crates/core/src/kernel/snapshot/next.rs +++ b/crates/core/src/kernel/snapshot/next.rs @@ -1,13 +1,19 @@ +//! Snapshot of a Delta Table at a specific version. +//! use std::collections::HashSet; -use std::sync::Arc; +use std::sync::{Arc, LazyLock}; use arrow::compute::{concat_batches, filter_record_batch}; use arrow_arith::boolean::{and, is_null, not}; use arrow_array::cast::AsArray; use arrow_array::types::Int64Type; use arrow_array::{Array, BooleanArray, RecordBatch}; +use arrow_cast::pretty::print_batches; use chrono::{DateTime, Utc}; use delta_kernel::actions::set_transaction::{SetTransactionMap, SetTransactionScanner}; +use delta_kernel::actions::visitors::{ + AddVisitor, CdcVisitor, MetadataVisitor, ProtocolVisitor, RemoveVisitor, SetTransactionVisitor, +}; use delta_kernel::actions::{ get_log_add_schema, get_log_schema, ADD_NAME, CDC_NAME, METADATA_NAME, PROTOCOL_NAME, REMOVE_NAME, SET_TRANSACTION_NAME, @@ -20,12 +26,16 @@ use delta_kernel::engine::default::executor::tokio::{ use delta_kernel::engine::default::DefaultEngine; use delta_kernel::engine_data::{GetData, RowVisitor, TypedGetData as _}; use delta_kernel::expressions::{Scalar, StructData}; +use delta_kernel::log_segment::LogSegment; use delta_kernel::scan::log_replay::scan_action_iter; -use delta_kernel::scan::{scan_row_schema, PhysicalPredicate}; -use delta_kernel::schema::Schema; +use delta_kernel::scan::scan_row_schema; +use delta_kernel::schema::{DataType, Schema, StructField, StructType}; use delta_kernel::snapshot::Snapshot as SnapshotInner; use delta_kernel::table_properties::TableProperties; -use delta_kernel::{DeltaResult as KernelResult, Engine, EngineData, Expression, Table, Version}; +use delta_kernel::{ + DeltaResult as KernelResult, Engine, EngineData, Expression, ExpressionHandler, ExpressionRef, + Table, Version, +}; use itertools::Itertools; use object_store::path::Path; use object_store::ObjectStore; @@ -33,7 +43,7 @@ use tracing::warn; use url::Url; use crate::kernel::scalars::ScalarExt; -use crate::kernel::ActionType; +use crate::kernel::{ActionType, ARROW_HANDLER}; use crate::storage::cache::CommitCacheObjectStore; use crate::{DeltaResult, DeltaTableConfig, DeltaTableError}; @@ -41,6 +51,26 @@ type ReplayIter = Box, Vec type LocalFileSystem = CommitCacheObjectStore; +#[derive(thiserror::Error, Debug)] +enum SnapshotError { + #[error("Snapshot not initialized for action type: {0}")] + MissingData(String), +} + +impl SnapshotError { + fn missing_data(action: ActionType) -> Self { + Self::MissingData(action.field_name_unckecked().to_string()) + } +} + +impl From for DeltaTableError { + fn from(e: SnapshotError) -> Self { + match &e { + SnapshotError::MissingData(_) => DeltaTableError::generic(e), + } + } +} + impl ActionType { pub(self) fn field_name_unckecked(&self) -> &'static str { match self { @@ -88,7 +118,7 @@ impl Snapshot { pub async fn try_new( table: Table, store: Arc, - version: Option, + version: impl Into>, ) -> DeltaResult { // TODO: how to deal with the dedicated IO runtime? Would this already be covered by the // object store implementation pass to this? @@ -113,7 +143,7 @@ impl Snapshot { _ => return Err(DeltaTableError::generic("unsupported runtime flavor")), }; - let snapshot = table.snapshot(engine.as_ref(), version.map(|v| v as u64))?; + let snapshot = table.snapshot(engine.as_ref(), version.into())?; Ok(Self::new(Arc::new(snapshot), engine)) } @@ -125,7 +155,7 @@ impl Snapshot { &self.inner.table_root() } - pub fn version(&self) -> u64 { + pub fn version(&self) -> Version { self.inner.version() } @@ -159,6 +189,49 @@ impl Snapshot { .map(|f| f.location.last_modified) } + /// read all active files from the log + pub(crate) fn files( + &self, + predicate: Option>, + ) -> DeltaResult>> { + let scan = self + .inner + .clone() + .scan_builder() + .with_predicate(predicate) + .build()?; + Ok(scan.scan_data(self.engine.as_ref())?.map(|res| { + res.and_then(|(data, mut predicate)| { + let batch: RecordBatch = ArrowEngineData::try_from_engine_data(data)?.into(); + if predicate.len() < batch.num_rows() { + predicate + .extend(std::iter::repeat(true).take(batch.num_rows() - predicate.len())); + } + Ok(filter_record_batch(&batch, &BooleanArray::from(predicate))?) + }) + })) + } + + pub(crate) fn tombstones(&self) -> DeltaResult>> { + static META_PREDICATE: LazyLock> = LazyLock::new(|| { + Some(Arc::new( + Expression::column([REMOVE_NAME, "path"]).is_not_null(), + )) + }); + let read_schema = get_log_schema().project(&[REMOVE_NAME])?; + Ok(self + .inner + ._log_segment() + .replay( + self.engine.as_ref(), + read_schema.clone(), + read_schema, + META_PREDICATE.clone(), + )? + .map_ok(|(d, _)| Ok(RecordBatch::from(ArrowEngineData::try_from_engine_data(d)?))) + .flatten()) + } + /// Scan the Delta Log to obtain the latest transaction for all applications /// /// This method requires a full scan of the log to find all transactions. @@ -180,43 +253,13 @@ impl Snapshot { let scanner = SetTransactionScanner::new(self.inner.clone()); Ok(scanner.application_transaction(self.engine.as_ref(), app_id.as_ref())?) } - - fn log_data( - &self, - types: &[ActionType], - ) -> DeltaResult>> { - let field_names = types - .iter() - .filter_map(|t| t.field_name().ok()) - .collect::>(); - if field_names.len() != types.len() { - warn!("skipping unsupported action types"); - } - let log_schema = get_log_schema().project(&field_names)?; - Ok(self - .inner - ._log_segment() - .replay( - self.engine.as_ref(), - log_schema.clone(), - log_schema.clone(), - None, - )? - .map_ok(|(d, flag)| { - Ok(( - RecordBatch::from(ArrowEngineData::try_from_engine_data(d)?), - flag, - )) - }) - .flatten()) - } } #[derive(Clone)] pub struct EagerSnapshot { snapshot: Snapshot, - files: RecordBatch, - actions: Option, + files: Option, + predicate: Option>, } impl EagerSnapshot { @@ -229,101 +272,23 @@ impl EagerSnapshot { table_root: impl AsRef, store: Arc, config: DeltaTableConfig, - version: Option, + version: impl Into>, tracked_actions: HashSet, - predicate: Option, + predicate: Option>, ) -> DeltaResult { - let mut replay_actions = Vec::new(); - if config.require_files { - replay_actions.push(ActionType::Add); - replay_actions.push(ActionType::Remove); - } - replay_actions.extend(tracked_actions.into_iter().filter(|it| { - !config.require_files || (it != &ActionType::Add && it != &ActionType::Remove) - })); - let snapshot = Snapshot::try_new(Table::try_from_uri(table_root)?, store, version).await?; - - let mut replay_data = Vec::new(); - let mut action_data = Vec::new(); - for slice in snapshot.log_data(&replay_actions)? { - let (batch, flag) = slice?; - - let action_projection = replay_actions - .iter() - .filter_map(|t| { - (t != &ActionType::Add && t != &ActionType::Remove) - .then_some( - t.field_name() - .ok() - .and_then(|n| batch.schema_ref().index_of(n).ok()), - ) - .flatten() - }) - .collect_vec(); - - if !action_projection.is_empty() { - action_data.push(batch.project(&action_projection)?); - } - - if config.require_files { - let file_data = batch.project(&[0, 1])?; - let file_data = filter_record_batch( - &file_data, - ¬(&and( - &is_null(batch.column(0))?, - &is_null(batch.column(1))?, - )?)?, - )?; - replay_data.push(Ok(( - Box::new(ArrowEngineData::from(file_data)) as Box, - flag, - ))); - } - } - - let files_schema = Arc::new(get_log_add_schema().as_ref().try_into()?); - let scan_schema = Arc::new((&scan_row_schema()).try_into()?); - - let files = if !replay_data.is_empty() { - let (engine, action_iter) = (snapshot.engine_ref().as_ref(), replay_data.into_iter()); - let physical_predicate = - predicate.and_then(|p| PhysicalPredicate::try_new(&p, snapshot.schema()).ok()); - - let it: ReplayIter = match physical_predicate { - Some(PhysicalPredicate::StaticSkipAll) => Box::new(std::iter::empty()), - Some(PhysicalPredicate::Some(p, s)) => { - Box::new(scan_action_iter(engine, action_iter, Some((p, s)))) - } - None | Some(PhysicalPredicate::None) => { - Box::new(scan_action_iter(engine, action_iter, None)) - } - }; - - let mut filtered = Vec::new(); - for res in it { - let (batch, selection) = res?; - let predicate = BooleanArray::from(selection); - let data: RecordBatch = ArrowEngineData::try_from_engine_data(batch)?.into(); - filtered.push(filter_record_batch(&data, &predicate)?); - } - concat_batches(&scan_schema, &filtered)? - } else { - RecordBatch::new_empty(scan_schema.clone()) - }; - - let actions = (!action_data.is_empty()) - .then(|| concat_batches(&files_schema, &action_data).ok()) - .flatten(); - + let files = config + .require_files + .then(|| -> DeltaResult<_> { Ok(replay_file_actions(&snapshot)?) }) + .transpose()?; Ok(Self { snapshot, files, - actions, + predicate, }) } - pub fn version(&self) -> u64 { + pub fn version(&self) -> Version { self.snapshot.version() } @@ -335,7 +300,7 @@ impl EagerSnapshot { self.snapshot.protocol() } - pub fn metadata(&self) -> &delta_kernel::actions::Metadata { + pub fn metadata(&self) -> &Metadata { self.snapshot.metadata() } @@ -343,17 +308,138 @@ impl EagerSnapshot { &self.snapshot.table_properties() } - pub fn files(&self) -> impl Iterator { - LogicalFileView { - files: self.files.clone(), + pub fn files(&self) -> DeltaResult> { + Ok(LogicalFileView { + files: self + .files + .clone() + .ok_or_else(|| SnapshotError::missing_data(ActionType::Add))?, index: 0, - } + }) } /// Get the number of files in the current snapshot - pub fn files_count(&self) -> usize { - self.files.num_rows() + pub fn files_count(&self) -> DeltaResult { + Ok(self + .files + .as_ref() + .map(|f| f.num_rows()) + .ok_or_else(|| SnapshotError::missing_data(ActionType::Add))?) } + + pub fn tombstones(&self) -> DeltaResult>> { + self.snapshot.tombstones() + } + + /// Scan the Delta Log to obtain the latest transaction for all applications + /// + /// This method requires a full scan of the log to find all transactions. + /// When a specific application id is requested, it is much more efficient to use + /// [`application_transaction`](Self::application_transaction) instead. + pub fn application_transactions(&self) -> DeltaResult { + self.snapshot.application_transactions() + } + + /// Scan the Delta Log for the latest transaction entry for a specific application. + /// + /// Initiates a log scan, but terminates as soon as the transaction + /// for the given application is found. + pub fn application_transaction( + &self, + app_id: impl AsRef, + ) -> DeltaResult> { + self.snapshot.application_transaction(app_id) + } + + pub(crate) fn update(&mut self) -> DeltaResult<()> { + let state = self + .files + .as_ref() + .ok_or(SnapshotError::missing_data(ActionType::Add))? + .clone(); + + let log_root = self.snapshot.table_root().join("_delta_log/").unwrap(); + let fs_client = self.snapshot.engine.get_file_system_client(); + let commit_read_schema = get_log_schema().project(&[ADD_NAME, REMOVE_NAME])?; + let checkpoint_read_schema = get_log_add_schema().clone(); + + let segment = + LogSegment::for_table_changes(fs_client.as_ref(), log_root, self.version() + 1, None)?; + let slice_iter = segment + .replay( + self.snapshot.engine.as_ref(), + commit_read_schema, + checkpoint_read_schema, + None, + )? + .chain(std::iter::once(Ok(( + Box::new(ArrowEngineData::from(state)) as Box, + false, + )))); + + let res = scan_action_iter(self.snapshot.engine.as_ref(), slice_iter, None) + .map(|res| { + res.and_then(|(d, sel)| { + let batch = RecordBatch::from(ArrowEngineData::try_from_engine_data(d)?); + Ok(filter_record_batch(&batch, &BooleanArray::from(sel))?) + }) + }) + .collect::, _>>()?; + + self.files = Some(concat_batches(res[0].schema_ref(), &res)?); + + Ok(()) + } +} + +fn replay_file_actions(snapshot: &Snapshot) -> DeltaResult { + let commit_read_schema = get_log_schema().project(&[ADD_NAME, REMOVE_NAME])?; + let checkpoint_read_schema = get_log_add_schema().clone(); + + let curr_data = snapshot + .inner + ._log_segment() + .replay( + snapshot.engine.as_ref(), + commit_read_schema.clone(), + checkpoint_read_schema.clone(), + None, + )? + .map_ok( + |(data, flag)| -> Result<(RecordBatch, bool), delta_kernel::Error> { + Ok((ArrowEngineData::try_from_engine_data(data)?.into(), flag)) + }, + ) + .flatten() + .collect::, _>>()?; + + let scan_iter = curr_data.clone().into_iter().map(|(data, flag)| { + Ok(( + Box::new(ArrowEngineData::new(data.clone())) as Box, + flag, + )) + }); + + let res = scan_action_iter(snapshot.engine.as_ref(), scan_iter, None) + .map(|res| { + res.and_then(|(d, selection)| { + Ok(( + RecordBatch::from(ArrowEngineData::try_from_engine_data(d)?), + selection, + )) + }) + }) + .zip(curr_data.into_iter()) + .map(|(scan_res, (data_raw, _))| match scan_res { + Ok((_, selection)) => { + let data = filter_record_batch(&data_raw, &BooleanArray::from(selection))?; + Ok(data.project(&[0])?) + } + Err(e) => Err(e), + }) + .collect::, _>>()?; + + Ok(concat_batches(res[0].schema_ref(), &res)?) } /// Helper trait to extract individual values from a `StructData`. @@ -451,7 +537,6 @@ impl Iterator for LogicalFileView { mod tests { use super::*; - use arrow_cast::pretty::print_batches; use deltalake_test::acceptance::{read_dat_case, TestCaseInfo}; use deltalake_test::TestResult; use std::path::PathBuf; @@ -508,31 +593,31 @@ mod tests { #[tokio::test] async fn load_eager_snapshot() -> TestResult<()> { - // some comment let mut dat_dir = get_dat_dir(); dat_dir.push("multi_partitioned"); + let dat_info: TestCaseInfo = read_dat_case(dat_dir)?; let table_info = dat_info.table_summary()?; let table = Table::try_from_uri(dat_info.table_root()?)?; - let snapshot = EagerSnapshot::try_new_with_actions( + let mut snapshot = EagerSnapshot::try_new_with_actions( table.location(), Arc::new(object_store::local::LocalFileSystem::default()), Default::default(), - None, + Some(1), Default::default(), None, ) .await?; - assert_eq!(snapshot.version(), table_info.version); - assert_eq!( - snapshot.protocol().min_reader_version(), - table_info.min_reader_version - ); + // assert_eq!(snapshot.version(), table_info.version); + // assert_eq!( + // snapshot.protocol().min_reader_version(), + // table_info.min_reader_version + // ); - print_batches(&[snapshot.files])?; + snapshot.update()?; Ok(()) } diff --git a/crates/core/src/operations/vacuum.rs b/crates/core/src/operations/vacuum.rs index 4e5c46589f..9fd614e83e 100644 --- a/crates/core/src/operations/vacuum.rs +++ b/crates/core/src/operations/vacuum.rs @@ -217,6 +217,7 @@ impl VacuumBuilder { self.log_store.object_store(None).clone(), ) .await?; + let valid_files = self.snapshot.file_paths_iter().collect::>(); let mut files_to_delete = vec![]; diff --git a/crates/core/src/protocol/checkpoints.rs b/crates/core/src/protocol/checkpoints.rs index 1fada05d38..f6ebacff99 100644 --- a/crates/core/src/protocol/checkpoints.rs +++ b/crates/core/src/protocol/checkpoints.rs @@ -174,7 +174,7 @@ pub async fn create_checkpoint_for( let tombstones = state .unexpired_tombstones(log_store.object_store(None).clone()) .await - .map_err(|_| ProtocolError::Generic("filed to get tombstones".into()))? + .map_err(|_| ProtocolError::Generic("failed to get tombstones".into()))? .collect::>(); let (checkpoint, parquet_bytes) = parquet_bytes_from_state(state, tombstones)?; diff --git a/crates/core/src/storage/mod.rs b/crates/core/src/storage/mod.rs index f44e86753d..31f4f60c77 100644 --- a/crates/core/src/storage/mod.rs +++ b/crates/core/src/storage/mod.rs @@ -30,8 +30,7 @@ pub use retry_ext::ObjectStoreRetryExt; use std::ops::Range; pub use utils::*; -#[cfg(feature = "log-cache")] -pub mod cache; +pub(crate) mod cache; pub mod file; pub mod retry_ext; pub mod utils; From a5672b58e55b2f907cc78ef4f7dd21e6b2023315 Mon Sep 17 00:00:00 2001 From: Robert Pack Date: Thu, 16 Jan 2025 14:17:43 +0100 Subject: [PATCH 06/23] feat: introduce snapshot trait Signed-off-by: Robert Pack --- crates/core/src/kernel/mod.rs | 1 + crates/core/src/kernel/snapshot/mod.rs | 1 - crates/core/src/kernel/snapshot/next.rs | 624 ------------------ .../snapshot_next}/cache.rs | 8 +- crates/core/src/kernel/snapshot_next/eager.rs | 263 ++++++++ .../src/kernel/snapshot_next/iterators.rs | 310 +++++++++ crates/core/src/kernel/snapshot_next/lazy.rs | 229 +++++++ crates/core/src/kernel/snapshot_next/mod.rs | 161 +++++ crates/core/src/storage/mod.rs | 1 - 9 files changed, 966 insertions(+), 632 deletions(-) delete mode 100644 crates/core/src/kernel/snapshot/next.rs rename crates/core/src/{storage => kernel/snapshot_next}/cache.rs (95%) create mode 100644 crates/core/src/kernel/snapshot_next/eager.rs create mode 100644 crates/core/src/kernel/snapshot_next/iterators.rs create mode 100644 crates/core/src/kernel/snapshot_next/lazy.rs create mode 100644 crates/core/src/kernel/snapshot_next/mod.rs diff --git a/crates/core/src/kernel/mod.rs b/crates/core/src/kernel/mod.rs index b2fcd71634..0a51630ee5 100644 --- a/crates/core/src/kernel/mod.rs +++ b/crates/core/src/kernel/mod.rs @@ -10,6 +10,7 @@ pub mod error; pub mod models; pub mod scalars; mod snapshot; +pub mod snapshot_next; pub use error::*; pub use models::*; diff --git a/crates/core/src/kernel/snapshot/mod.rs b/crates/core/src/kernel/snapshot/mod.rs index 38d1dc570d..2938b3d3db 100644 --- a/crates/core/src/kernel/snapshot/mod.rs +++ b/crates/core/src/kernel/snapshot/mod.rs @@ -45,7 +45,6 @@ pub use self::log_data::*; mod log_data; pub(crate) mod log_segment; -mod next; pub(crate) mod parse; mod replay; mod serde; diff --git a/crates/core/src/kernel/snapshot/next.rs b/crates/core/src/kernel/snapshot/next.rs deleted file mode 100644 index 56127d21c0..0000000000 --- a/crates/core/src/kernel/snapshot/next.rs +++ /dev/null @@ -1,624 +0,0 @@ -//! Snapshot of a Delta Table at a specific version. -//! -use std::collections::HashSet; -use std::sync::{Arc, LazyLock}; - -use arrow::compute::{concat_batches, filter_record_batch}; -use arrow_arith::boolean::{and, is_null, not}; -use arrow_array::cast::AsArray; -use arrow_array::types::Int64Type; -use arrow_array::{Array, BooleanArray, RecordBatch}; -use arrow_cast::pretty::print_batches; -use chrono::{DateTime, Utc}; -use delta_kernel::actions::set_transaction::{SetTransactionMap, SetTransactionScanner}; -use delta_kernel::actions::visitors::{ - AddVisitor, CdcVisitor, MetadataVisitor, ProtocolVisitor, RemoveVisitor, SetTransactionVisitor, -}; -use delta_kernel::actions::{ - get_log_add_schema, get_log_schema, ADD_NAME, CDC_NAME, METADATA_NAME, PROTOCOL_NAME, - REMOVE_NAME, SET_TRANSACTION_NAME, -}; -use delta_kernel::actions::{Metadata, Protocol, SetTransaction}; -use delta_kernel::engine::arrow_data::ArrowEngineData; -use delta_kernel::engine::default::executor::tokio::{ - TokioBackgroundExecutor, TokioMultiThreadExecutor, -}; -use delta_kernel::engine::default::DefaultEngine; -use delta_kernel::engine_data::{GetData, RowVisitor, TypedGetData as _}; -use delta_kernel::expressions::{Scalar, StructData}; -use delta_kernel::log_segment::LogSegment; -use delta_kernel::scan::log_replay::scan_action_iter; -use delta_kernel::scan::scan_row_schema; -use delta_kernel::schema::{DataType, Schema, StructField, StructType}; -use delta_kernel::snapshot::Snapshot as SnapshotInner; -use delta_kernel::table_properties::TableProperties; -use delta_kernel::{ - DeltaResult as KernelResult, Engine, EngineData, Expression, ExpressionHandler, ExpressionRef, - Table, Version, -}; -use itertools::Itertools; -use object_store::path::Path; -use object_store::ObjectStore; -use tracing::warn; -use url::Url; - -use crate::kernel::scalars::ScalarExt; -use crate::kernel::{ActionType, ARROW_HANDLER}; -use crate::storage::cache::CommitCacheObjectStore; -use crate::{DeltaResult, DeltaTableConfig, DeltaTableError}; - -type ReplayIter = Box, Vec)>>>; - -type LocalFileSystem = CommitCacheObjectStore; - -#[derive(thiserror::Error, Debug)] -enum SnapshotError { - #[error("Snapshot not initialized for action type: {0}")] - MissingData(String), -} - -impl SnapshotError { - fn missing_data(action: ActionType) -> Self { - Self::MissingData(action.field_name_unckecked().to_string()) - } -} - -impl From for DeltaTableError { - fn from(e: SnapshotError) -> Self { - match &e { - SnapshotError::MissingData(_) => DeltaTableError::generic(e), - } - } -} - -impl ActionType { - pub(self) fn field_name_unckecked(&self) -> &'static str { - match self { - Self::Metadata => METADATA_NAME, - Self::Protocol => PROTOCOL_NAME, - Self::Remove => REMOVE_NAME, - Self::Add => ADD_NAME, - Self::Txn => SET_TRANSACTION_NAME, - Self::Cdc => CDC_NAME, - _ => panic!(), - } - } - - pub(self) fn field_name(&self) -> DeltaResult<&'static str> { - let name = match self { - Self::Metadata => METADATA_NAME, - Self::Protocol => PROTOCOL_NAME, - Self::Remove => REMOVE_NAME, - Self::Add => ADD_NAME, - Self::Txn => SET_TRANSACTION_NAME, - Self::Cdc => CDC_NAME, - _ => { - return Err(DeltaTableError::generic(format!( - "unsupported action type: {self:?}" - ))) - } - }; - Ok(name) - } -} - -#[derive(Clone)] -pub struct Snapshot { - inner: Arc, - engine: Arc, -} - -impl Snapshot { - /// Create a new [`Snapshot`] instance. - pub fn new(inner: Arc, engine: Arc) -> Self { - Self { inner, engine } - } - - /// Create a new [`Snapshot`] instance for a table. - pub async fn try_new( - table: Table, - store: Arc, - version: impl Into>, - ) -> DeltaResult { - // TODO: how to deal with the dedicated IO runtime? Would this already be covered by the - // object store implementation pass to this? - let table_root = Path::from_url_path(table.location().path())?; - let store_str = format!("{}", store); - let is_local = store_str.starts_with("LocalFileSystem"); - let store = Arc::new(CommitCacheObjectStore::new(store)); - let handle = tokio::runtime::Handle::current(); - let engine: Arc = match handle.runtime_flavor() { - tokio::runtime::RuntimeFlavor::MultiThread => Arc::new(DefaultEngine::new_with_opts( - store, - table_root, - Arc::new(TokioMultiThreadExecutor::new(handle)), - !is_local, - )), - tokio::runtime::RuntimeFlavor::CurrentThread => Arc::new(DefaultEngine::new_with_opts( - store, - table_root, - Arc::new(TokioBackgroundExecutor::new()), - !is_local, - )), - _ => return Err(DeltaTableError::generic("unsupported runtime flavor")), - }; - - let snapshot = table.snapshot(engine.as_ref(), version.into())?; - Ok(Self::new(Arc::new(snapshot), engine)) - } - - pub(crate) fn engine_ref(&self) -> &Arc { - &self.engine - } - - pub fn table_root(&self) -> &Url { - &self.inner.table_root() - } - - pub fn version(&self) -> Version { - self.inner.version() - } - - pub fn schema(&self) -> &Schema { - self.inner.schema() - } - - pub fn protocol(&self) -> &Protocol { - self.inner.protocol() - } - - pub fn metadata(&self) -> &Metadata { - self.inner.metadata() - } - - pub fn table_properties(&self) -> &TableProperties { - &self.inner.table_properties() - } - - /// Get the timestamp of the given version in miliscends since epoch. - /// - /// Extracts the timestamp from the commit file of the given version - /// from the current log segment. If the commit file is not part of the - /// current log segment, `None` is returned. - pub fn version_timestamp(&self, version: Version) -> Option { - self.inner - ._log_segment() - .ascending_commit_files - .iter() - .find(|f| f.version == version) - .map(|f| f.location.last_modified) - } - - /// read all active files from the log - pub(crate) fn files( - &self, - predicate: Option>, - ) -> DeltaResult>> { - let scan = self - .inner - .clone() - .scan_builder() - .with_predicate(predicate) - .build()?; - Ok(scan.scan_data(self.engine.as_ref())?.map(|res| { - res.and_then(|(data, mut predicate)| { - let batch: RecordBatch = ArrowEngineData::try_from_engine_data(data)?.into(); - if predicate.len() < batch.num_rows() { - predicate - .extend(std::iter::repeat(true).take(batch.num_rows() - predicate.len())); - } - Ok(filter_record_batch(&batch, &BooleanArray::from(predicate))?) - }) - })) - } - - pub(crate) fn tombstones(&self) -> DeltaResult>> { - static META_PREDICATE: LazyLock> = LazyLock::new(|| { - Some(Arc::new( - Expression::column([REMOVE_NAME, "path"]).is_not_null(), - )) - }); - let read_schema = get_log_schema().project(&[REMOVE_NAME])?; - Ok(self - .inner - ._log_segment() - .replay( - self.engine.as_ref(), - read_schema.clone(), - read_schema, - META_PREDICATE.clone(), - )? - .map_ok(|(d, _)| Ok(RecordBatch::from(ArrowEngineData::try_from_engine_data(d)?))) - .flatten()) - } - - /// Scan the Delta Log to obtain the latest transaction for all applications - /// - /// This method requires a full scan of the log to find all transactions. - /// When a specific application id is requested, it is much more efficient to use - /// [`application_transaction`](Self::application_transaction) instead. - pub fn application_transactions(&self) -> DeltaResult { - let scanner = SetTransactionScanner::new(self.inner.clone()); - Ok(scanner.application_transactions(self.engine.as_ref())?) - } - - /// Scan the Delta Log for the latest transaction entry for a specific application. - /// - /// Initiates a log scan, but terminates as soon as the transaction - /// for the given application is found. - pub fn application_transaction( - &self, - app_id: impl AsRef, - ) -> DeltaResult> { - let scanner = SetTransactionScanner::new(self.inner.clone()); - Ok(scanner.application_transaction(self.engine.as_ref(), app_id.as_ref())?) - } -} - -#[derive(Clone)] -pub struct EagerSnapshot { - snapshot: Snapshot, - files: Option, - predicate: Option>, -} - -impl EagerSnapshot { - /// Create a new [`EagerSnapshot`] instance tracking actions of the given types. - /// - /// Only actions supplied by `tracked_actions` will be loaded into memory. - /// This is useful when only a subset of actions are needed. `Add` and `Remove` actions - /// are treated specially. I.e. `Add` and `Remove` will be loaded as well. - pub async fn try_new_with_actions( - table_root: impl AsRef, - store: Arc, - config: DeltaTableConfig, - version: impl Into>, - tracked_actions: HashSet, - predicate: Option>, - ) -> DeltaResult { - let snapshot = Snapshot::try_new(Table::try_from_uri(table_root)?, store, version).await?; - let files = config - .require_files - .then(|| -> DeltaResult<_> { Ok(replay_file_actions(&snapshot)?) }) - .transpose()?; - Ok(Self { - snapshot, - files, - predicate, - }) - } - - pub fn version(&self) -> Version { - self.snapshot.version() - } - - pub fn schema(&self) -> &Schema { - self.snapshot.schema() - } - - pub fn protocol(&self) -> &Protocol { - self.snapshot.protocol() - } - - pub fn metadata(&self) -> &Metadata { - self.snapshot.metadata() - } - - pub fn table_properties(&self) -> &TableProperties { - &self.snapshot.table_properties() - } - - pub fn files(&self) -> DeltaResult> { - Ok(LogicalFileView { - files: self - .files - .clone() - .ok_or_else(|| SnapshotError::missing_data(ActionType::Add))?, - index: 0, - }) - } - - /// Get the number of files in the current snapshot - pub fn files_count(&self) -> DeltaResult { - Ok(self - .files - .as_ref() - .map(|f| f.num_rows()) - .ok_or_else(|| SnapshotError::missing_data(ActionType::Add))?) - } - - pub fn tombstones(&self) -> DeltaResult>> { - self.snapshot.tombstones() - } - - /// Scan the Delta Log to obtain the latest transaction for all applications - /// - /// This method requires a full scan of the log to find all transactions. - /// When a specific application id is requested, it is much more efficient to use - /// [`application_transaction`](Self::application_transaction) instead. - pub fn application_transactions(&self) -> DeltaResult { - self.snapshot.application_transactions() - } - - /// Scan the Delta Log for the latest transaction entry for a specific application. - /// - /// Initiates a log scan, but terminates as soon as the transaction - /// for the given application is found. - pub fn application_transaction( - &self, - app_id: impl AsRef, - ) -> DeltaResult> { - self.snapshot.application_transaction(app_id) - } - - pub(crate) fn update(&mut self) -> DeltaResult<()> { - let state = self - .files - .as_ref() - .ok_or(SnapshotError::missing_data(ActionType::Add))? - .clone(); - - let log_root = self.snapshot.table_root().join("_delta_log/").unwrap(); - let fs_client = self.snapshot.engine.get_file_system_client(); - let commit_read_schema = get_log_schema().project(&[ADD_NAME, REMOVE_NAME])?; - let checkpoint_read_schema = get_log_add_schema().clone(); - - let segment = - LogSegment::for_table_changes(fs_client.as_ref(), log_root, self.version() + 1, None)?; - let slice_iter = segment - .replay( - self.snapshot.engine.as_ref(), - commit_read_schema, - checkpoint_read_schema, - None, - )? - .chain(std::iter::once(Ok(( - Box::new(ArrowEngineData::from(state)) as Box, - false, - )))); - - let res = scan_action_iter(self.snapshot.engine.as_ref(), slice_iter, None) - .map(|res| { - res.and_then(|(d, sel)| { - let batch = RecordBatch::from(ArrowEngineData::try_from_engine_data(d)?); - Ok(filter_record_batch(&batch, &BooleanArray::from(sel))?) - }) - }) - .collect::, _>>()?; - - self.files = Some(concat_batches(res[0].schema_ref(), &res)?); - - Ok(()) - } -} - -fn replay_file_actions(snapshot: &Snapshot) -> DeltaResult { - let commit_read_schema = get_log_schema().project(&[ADD_NAME, REMOVE_NAME])?; - let checkpoint_read_schema = get_log_add_schema().clone(); - - let curr_data = snapshot - .inner - ._log_segment() - .replay( - snapshot.engine.as_ref(), - commit_read_schema.clone(), - checkpoint_read_schema.clone(), - None, - )? - .map_ok( - |(data, flag)| -> Result<(RecordBatch, bool), delta_kernel::Error> { - Ok((ArrowEngineData::try_from_engine_data(data)?.into(), flag)) - }, - ) - .flatten() - .collect::, _>>()?; - - let scan_iter = curr_data.clone().into_iter().map(|(data, flag)| { - Ok(( - Box::new(ArrowEngineData::new(data.clone())) as Box, - flag, - )) - }); - - let res = scan_action_iter(snapshot.engine.as_ref(), scan_iter, None) - .map(|res| { - res.and_then(|(d, selection)| { - Ok(( - RecordBatch::from(ArrowEngineData::try_from_engine_data(d)?), - selection, - )) - }) - }) - .zip(curr_data.into_iter()) - .map(|(scan_res, (data_raw, _))| match scan_res { - Ok((_, selection)) => { - let data = filter_record_batch(&data_raw, &BooleanArray::from(selection))?; - Ok(data.project(&[0])?) - } - Err(e) => Err(e), - }) - .collect::, _>>()?; - - Ok(concat_batches(res[0].schema_ref(), &res)?) -} - -/// Helper trait to extract individual values from a `StructData`. -pub trait StructDataExt { - fn get(&self, key: &str) -> Option<&Scalar>; -} - -impl StructDataExt for StructData { - fn get(&self, key: &str) -> Option<&Scalar> { - self.fields() - .iter() - .zip(self.values().iter()) - .find(|(k, _)| k.name() == key) - .map(|(_, v)| v) - } -} - -#[derive(Clone)] -pub struct LogicalFileView { - files: RecordBatch, - index: usize, -} - -impl LogicalFileView { - /// Path of the file. - pub fn path(&self) -> &str { - self.files.column(0).as_string::().value(self.index) - } - - /// Size of the file in bytes. - pub fn size(&self) -> i64 { - self.files - .column(1) - .as_primitive::() - .value(self.index) - } - - /// Modification time of the file in milliseconds since epoch. - pub fn modification_time(&self) -> i64 { - self.files - .column(2) - .as_primitive::() - .value(self.index) - } - - /// Datetime of the last modification time of the file. - pub fn modification_datetime(&self) -> DeltaResult> { - DateTime::from_timestamp_millis(self.modification_time()).ok_or(DeltaTableError::from( - crate::protocol::ProtocolError::InvalidField(format!( - "invalid modification_time: {:?}", - self.modification_time() - )), - )) - } - - pub fn stats(&self) -> Option<&str> { - let col = self.files.column(3).as_string::(); - col.is_valid(self.index).then(|| col.value(self.index)) - } - - pub fn partition_values(&self) -> Option { - self.files - .column_by_name("fileConstantValues") - .and_then(|col| col.as_struct_opt()) - .and_then(|s| s.column_by_name("partitionValues")) - .and_then(|arr| { - arr.is_valid(self.index) - .then(|| match Scalar::from_array(arr, self.index) { - Some(Scalar::Struct(s)) => Some(s), - _ => None, - }) - .flatten() - }) - } -} - -impl Iterator for LogicalFileView { - type Item = LogicalFileView; - - fn next(&mut self) -> Option { - if self.index < self.files.num_rows() { - let file = LogicalFileView { - files: self.files.clone(), - index: self.index, - }; - self.index += 1; - Some(file) - } else { - None - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - use deltalake_test::acceptance::{read_dat_case, TestCaseInfo}; - use deltalake_test::TestResult; - use std::path::PathBuf; - - fn get_dat_dir() -> PathBuf { - let d = PathBuf::from(env!("CARGO_MANIFEST_DIR")); - let mut rep_root = d - .parent() - .and_then(|p| p.parent()) - .expect("valid directory") - .to_path_buf(); - rep_root.push("dat/out/reader_tests/generated"); - rep_root - } - - async fn load_snapshot() -> TestResult<()> { - // some comment - let mut dat_dir = get_dat_dir(); - dat_dir.push("multi_partitioned"); - - let dat_info: TestCaseInfo = read_dat_case(dat_dir)?; - let table_info = dat_info.table_summary()?; - - let table = Table::try_from_uri(dat_info.table_root()?)?; - - let snapshot = Snapshot::try_new( - table, - Arc::new(object_store::local::LocalFileSystem::default()), - None, - ) - .await?; - - assert_eq!(snapshot.version(), table_info.version); - assert_eq!( - ( - snapshot.protocol().min_reader_version(), - snapshot.protocol().min_writer_version() - ), - (table_info.min_reader_version, table_info.min_writer_version) - ); - - Ok(()) - } - - #[tokio::test(flavor = "multi_thread")] - async fn load_snapshot_multi() -> TestResult<()> { - load_snapshot().await - } - - #[tokio::test(flavor = "current_thread")] - async fn load_snapshot_current() -> TestResult<()> { - load_snapshot().await - } - - #[tokio::test] - async fn load_eager_snapshot() -> TestResult<()> { - let mut dat_dir = get_dat_dir(); - dat_dir.push("multi_partitioned"); - - let dat_info: TestCaseInfo = read_dat_case(dat_dir)?; - let table_info = dat_info.table_summary()?; - - let table = Table::try_from_uri(dat_info.table_root()?)?; - - let mut snapshot = EagerSnapshot::try_new_with_actions( - table.location(), - Arc::new(object_store::local::LocalFileSystem::default()), - Default::default(), - Some(1), - Default::default(), - None, - ) - .await?; - - // assert_eq!(snapshot.version(), table_info.version); - // assert_eq!( - // snapshot.protocol().min_reader_version(), - // table_info.min_reader_version - // ); - - snapshot.update()?; - - Ok(()) - } -} diff --git a/crates/core/src/storage/cache.rs b/crates/core/src/kernel/snapshot_next/cache.rs similarity index 95% rename from crates/core/src/storage/cache.rs rename to crates/core/src/kernel/snapshot_next/cache.rs index eb6b5bd785..594d599942 100644 --- a/crates/core/src/storage/cache.rs +++ b/crates/core/src/kernel/snapshot_next/cache.rs @@ -3,7 +3,7 @@ use std::sync::Arc; use bytes::Bytes; use chrono::{DateTime, Utc}; use futures::stream::BoxStream; -use futures::{StreamExt, TryStreamExt}; +use futures::StreamExt; use object_store::path::Path; use object_store::{ Attributes, GetOptions, GetResult, GetResultPayload, ListResult, MultipartUpload, ObjectMeta, @@ -46,11 +46,10 @@ impl Entry { /// the object store are immutable and no attempt is made to invalidate the cache /// when files are updated in the remote object store. #[derive(Clone)] -pub(crate) struct CommitCacheObjectStore { +pub(super) struct CommitCacheObjectStore { inner: Arc, check: Arc bool + Send + Sync>, cache: Arc>, - has_ordered_listing: bool, } impl std::fmt::Debug for CommitCacheObjectStore { @@ -75,13 +74,10 @@ fn cache_json(path: &Path) -> bool { impl CommitCacheObjectStore { /// Create a new conditionally cached object store. pub fn new(inner: Arc) -> Self { - let store_str = format!("{}", inner); - let is_local = store_str.starts_with("LocalFileSystem"); Self { inner, check: Arc::new(cache_json), cache: Arc::new(Cache::new(100)), - has_ordered_listing: !is_local, } } diff --git a/crates/core/src/kernel/snapshot_next/eager.rs b/crates/core/src/kernel/snapshot_next/eager.rs new file mode 100644 index 0000000000..88306b8e49 --- /dev/null +++ b/crates/core/src/kernel/snapshot_next/eager.rs @@ -0,0 +1,263 @@ +use std::sync::Arc; + +use arrow::compute::{concat_batches, filter_record_batch}; +use arrow_array::{BooleanArray, RecordBatch}; +use chrono::format::Item; +use delta_kernel::actions::set_transaction::SetTransactionMap; +use delta_kernel::actions::{get_log_add_schema, get_log_schema, ADD_NAME, REMOVE_NAME}; +use delta_kernel::actions::{Add, Metadata, Protocol, SetTransaction}; +use delta_kernel::engine::arrow_data::ArrowEngineData; +use delta_kernel::log_segment::LogSegment; +use delta_kernel::scan::log_replay::scan_action_iter; +use delta_kernel::schema::Schema; +use delta_kernel::table_properties::TableProperties; +use delta_kernel::{EngineData, Expression, Table, Version}; +use itertools::Itertools; +use object_store::ObjectStore; +use url::Url; + +use super::iterators::{AddIterator, AddView, AddViewItem}; +use super::lazy::LazySnapshot; +use super::{Snapshot, SnapshotError}; +use crate::{DeltaResult, DeltaTableConfig, DeltaTableError}; + +/// An eager snapshot of a Delta Table at a specific version. +/// +/// This snapshot loads some log data eagerly and keeps it in memory. +#[derive(Clone)] +pub struct EagerSnapshot { + snapshot: LazySnapshot, + files: Option, + predicate: Option>, +} + +impl Snapshot for EagerSnapshot { + fn table_root(&self) -> &Url { + self.snapshot.table_root() + } + + fn version(&self) -> Version { + self.snapshot.version() + } + + fn schema(&self) -> &Schema { + self.snapshot.schema() + } + + fn protocol(&self) -> &Protocol { + self.snapshot.protocol() + } + + fn metadata(&self) -> &Metadata { + self.snapshot.metadata() + } + + fn table_properties(&self) -> &TableProperties { + self.snapshot.table_properties() + } + + fn files(&self) -> DeltaResult>> { + Ok(std::iter::once(Ok(self + .files + .clone() + .ok_or(SnapshotError::FilesNotInitialized)?))) + } + + fn tombstones(&self) -> DeltaResult>> { + self.snapshot.tombstones() + } + + fn application_transactions(&self) -> DeltaResult { + self.snapshot.application_transactions() + } + + fn application_transaction( + &self, + app_id: impl AsRef, + ) -> DeltaResult> { + self.snapshot.application_transaction(app_id) + } +} + +impl EagerSnapshot { + /// Create a new [`EagerSnapshot`] instance + pub async fn try_new( + table_root: impl AsRef, + store: Arc, + config: DeltaTableConfig, + version: impl Into>, + predicate: impl Into>>, + ) -> DeltaResult { + let snapshot = + LazySnapshot::try_new(Table::try_from_uri(table_root)?, store, version).await?; + let files = config + .require_files + .then(|| -> DeltaResult<_> { Ok(replay_file_actions(&snapshot)?) }) + .transpose()?; + Ok(Self { + snapshot, + files, + predicate: predicate.into(), + }) + } + + pub fn file_data(&self) -> DeltaResult<&RecordBatch> { + Ok(self + .files + .as_ref() + .ok_or(SnapshotError::FilesNotInitialized)?) + } + + pub fn files(&self) -> DeltaResult> { + AddView::try_new(self.file_data()?.clone()) + } + + pub fn file_actions(&self) -> DeltaResult> + '_> { + AddIterator::try_new(self.file_data()?) + } + + /// Get the number of files in the current snapshot + pub fn files_count(&self) -> DeltaResult { + Ok(self + .files + .as_ref() + .map(|f| f.num_rows()) + .ok_or_else(|| SnapshotError::FilesNotInitialized)?) + } + + pub(crate) fn update(&mut self) -> DeltaResult<()> { + let log_root = self.snapshot.table_root().join("_delta_log/").unwrap(); + let fs_client = self.snapshot.engine_ref().get_file_system_client(); + let commit_read_schema = get_log_schema().project(&[ADD_NAME, REMOVE_NAME])?; + let checkpoint_read_schema = get_log_add_schema().clone(); + + let segment = + LogSegment::for_table_changes(fs_client.as_ref(), log_root, self.version() + 1, None)?; + let mut slice_iter = segment + .replay( + self.snapshot.engine_ref().as_ref(), + commit_read_schema, + checkpoint_read_schema, + None, + )? + .map_ok( + |(data, flag)| -> Result<(RecordBatch, bool), delta_kernel::Error> { + Ok((ArrowEngineData::try_from_engine_data(data)?.into(), flag)) + }, + ) + .flatten() + .collect::, _>>()?; + + slice_iter.push(( + self.files + .as_ref() + .ok_or(SnapshotError::FilesNotInitialized)? + .clone(), + false, + )); + + self.files = Some(scan_as_log_data(&self.snapshot, slice_iter)?); + + Ok(()) + } +} + +fn replay_file_actions(snapshot: &LazySnapshot) -> DeltaResult { + let commit_read_schema = get_log_schema().project(&[ADD_NAME, REMOVE_NAME])?; + let checkpoint_read_schema = get_log_add_schema().clone(); + + let curr_data = snapshot + .inner + ._log_segment() + .replay( + snapshot.engine_ref().as_ref(), + commit_read_schema.clone(), + checkpoint_read_schema.clone(), + None, + )? + .map_ok( + |(data, flag)| -> Result<(RecordBatch, bool), delta_kernel::Error> { + Ok((ArrowEngineData::try_from_engine_data(data)?.into(), flag)) + }, + ) + .flatten() + .collect::, _>>()?; + + scan_as_log_data(snapshot, curr_data) +} + +fn scan_as_log_data( + snapshot: &LazySnapshot, + curr_data: Vec<(RecordBatch, bool)>, +) -> Result { + let scan_iter = curr_data.clone().into_iter().map(|(data, flag)| { + Ok(( + Box::new(ArrowEngineData::new(data.clone())) as Box, + flag, + )) + }); + + let res = scan_action_iter(snapshot.engine_ref().as_ref(), scan_iter, None) + .map(|res| { + res.and_then(|(d, selection)| { + Ok(( + RecordBatch::from(ArrowEngineData::try_from_engine_data(d)?), + selection, + )) + }) + }) + .zip(curr_data.into_iter()) + .map(|(scan_res, (data_raw, _))| match scan_res { + Ok((_, selection)) => { + let data = filter_record_batch(&data_raw, &BooleanArray::from(selection))?; + Ok(data.project(&[0])?) + } + Err(e) => Err(e), + }) + .collect::, _>>()?; + + Ok(concat_batches(res[0].schema_ref(), &res)?) +} + +#[cfg(test)] +mod tests { + use deltalake_test::acceptance::{read_dat_case, TestCaseInfo}; + use deltalake_test::TestResult; + + use super::super::tests::get_dat_dir; + use super::*; + + #[tokio::test] + async fn load_eager_snapshot() -> TestResult<()> { + let mut dat_dir = get_dat_dir(); + dat_dir.push("multi_partitioned"); + + let dat_info: TestCaseInfo = read_dat_case(dat_dir)?; + let table_info = dat_info.table_summary()?; + + let table = Table::try_from_uri(dat_info.table_root()?)?; + + let mut snapshot = EagerSnapshot::try_new( + table.location(), + Arc::new(object_store::local::LocalFileSystem::default()), + Default::default(), + Some(1), + None, + ) + .await?; + + // assert_eq!(snapshot.version(), table_info.version); + // assert_eq!( + // snapshot.protocol().min_reader_version(), + // table_info.min_reader_version + // ); + + snapshot.update()?; + + for file in snapshot.file_actions()? { + println!("file: {:#?}", file.unwrap()); + } + + Ok(()) + } +} diff --git a/crates/core/src/kernel/snapshot_next/iterators.rs b/crates/core/src/kernel/snapshot_next/iterators.rs new file mode 100644 index 0000000000..4700cb9da3 --- /dev/null +++ b/crates/core/src/kernel/snapshot_next/iterators.rs @@ -0,0 +1,310 @@ +use std::collections::HashSet; +use std::sync::Arc; + +use arrow_array::cast::AsArray; +use arrow_array::types::Int64Type; +use arrow_array::{ + Array, ArrayRef, BooleanArray, Int64Array, RecordBatch, StringArray, StructArray, +}; +use chrono::{DateTime, Utc}; +use delta_kernel::actions::visitors::AddVisitor; +use delta_kernel::actions::Add; +use delta_kernel::actions::ADD_NAME; +use delta_kernel::engine::arrow_data::ArrowEngineData; +use delta_kernel::engine::arrow_expression::ProvidesColumnByName; +use delta_kernel::engine_data::{GetData, RowVisitor}; +use delta_kernel::expressions::{Scalar, StructData}; + +use crate::kernel::scalars::ScalarExt; +use crate::{DeltaResult, DeltaTableError}; + +pub struct AddIterator<'a> { + paths: &'a StringArray, + getters: Arc>>, + index: usize, +} + +impl AddIterator<'_> { + pub fn try_new<'a>(actions: &'a RecordBatch) -> DeltaResult> { + validate_column::(actions, &[ADD_NAME, "path"])?; + validate_column::(actions, &[ADD_NAME, "size"])?; + validate_column::(actions, &[ADD_NAME, "modificationTime"])?; + validate_column::(actions, &[ADD_NAME, "dataChange"])?; + + let visitor = AddVisitor::new(); + let fields = visitor.selected_column_names_and_types(); + + let mut mask = HashSet::new(); + for column in fields.0 { + for i in 0..column.len() { + mask.insert(&column[..i + 1]); + } + } + + let mut getters = vec![]; + ArrowEngineData::extract_columns(&mut vec![], &mut getters, fields.1, &mask, actions)?; + + let paths = extract_column(actions, &[ADD_NAME, "path"])?.as_string::(); + + Ok(AddIterator { + paths, + getters: Arc::new(getters), + index: 0, + }) + } +} + +impl Iterator for AddIterator<'_> { + type Item = DeltaResult; + + fn next(&mut self) -> Option { + if self.index < self.paths.len() { + let path = self.paths.value(self.index).to_string(); + let add = AddVisitor::visit_add(self.index, path, self.getters.as_slice()) + .map_err(DeltaTableError::from); + self.index += 1; + Some(add) + } else { + None + } + } +} + +pub struct AddView { + actions: RecordBatch, + index: usize, +} + +impl AddView { + pub fn try_new(actions: RecordBatch) -> DeltaResult { + validate_column::(&actions, &[ADD_NAME, "path"])?; + validate_column::(&actions, &[ADD_NAME, "size"])?; + validate_column::(&actions, &[ADD_NAME, "modificationTime"])?; + validate_column::(&actions, &[ADD_NAME, "dataChange"])?; + Ok(Self { actions, index: 0 }) + } +} + +impl Iterator for AddView { + type Item = AddViewItem; + + fn next(&mut self) -> Option { + if self.index < self.actions.num_rows() { + let add = AddViewItem { + actions: self.actions.clone(), + index: self.index, + }; + self.index += 1; + Some(add) + } else { + None + } + } +} + +pub struct AddViewItem { + actions: RecordBatch, + index: usize, +} + +impl AddViewItem { + pub fn path(&self) -> &str { + extract_column(&self.actions, &[ADD_NAME, "path"]) + .unwrap() + .as_string::() + .value(self.index) + } + + pub fn size(&self) -> i64 { + extract_column(&self.actions, &[ADD_NAME, "size"]) + .unwrap() + .as_primitive::() + .value(self.index) + } + + pub fn modification_time(&self) -> i64 { + extract_column(&self.actions, &[ADD_NAME, "modificationTime"]) + .unwrap() + .as_primitive::() + .value(self.index) + } + + /// Datetime of the last modification time of the file. + pub fn modification_datetime(&self) -> DeltaResult> { + DateTime::from_timestamp_millis(self.modification_time()).ok_or(DeltaTableError::from( + crate::protocol::ProtocolError::InvalidField(format!( + "invalid modification_time: {:?}", + self.modification_time() + )), + )) + } + + pub fn data_change(&self) -> bool { + extract_column(&self.actions, &[ADD_NAME, "dataChange"]) + .unwrap() + .as_boolean() + .value(self.index) + } + + pub fn stats(&self) -> Option<&str> { + extract_column(&self.actions, &[ADD_NAME, "stats"]) + .ok() + .and_then(|c| c.as_string_opt::().map(|v| v.value(self.index))) + } + + pub fn base_row_id(&self) -> Option { + extract_column(&self.actions, &[ADD_NAME, "baseRowId"]) + .ok() + .and_then(|c| { + c.as_primitive_opt::() + .map(|v| v.value(self.index)) + }) + } + + pub fn default_row_commit_version(&self) -> Option { + extract_column(&self.actions, &[ADD_NAME, "defaultRowCommitVersion"]) + .ok() + .and_then(|c| { + c.as_primitive_opt::() + .map(|v| v.value(self.index)) + }) + } + + pub fn clustering_provider(&self) -> Option<&str> { + extract_column(&self.actions, &[ADD_NAME, "clusteringProvider"]) + .ok() + .and_then(|c| c.as_string_opt::().map(|v| v.value(self.index))) + } +} + +#[derive(Clone)] +pub struct LogicalFileView { + files: RecordBatch, + index: usize, +} + +impl LogicalFileView { + /// Path of the file. + pub fn path(&self) -> &str { + self.files.column(0).as_string::().value(self.index) + } + + /// Size of the file in bytes. + pub fn size(&self) -> i64 { + self.files + .column(1) + .as_primitive::() + .value(self.index) + } + + /// Modification time of the file in milliseconds since epoch. + pub fn modification_time(&self) -> i64 { + self.files + .column(2) + .as_primitive::() + .value(self.index) + } + + /// Datetime of the last modification time of the file. + pub fn modification_datetime(&self) -> DeltaResult> { + DateTime::from_timestamp_millis(self.modification_time()).ok_or(DeltaTableError::from( + crate::protocol::ProtocolError::InvalidField(format!( + "invalid modification_time: {:?}", + self.modification_time() + )), + )) + } + + pub fn stats(&self) -> Option<&str> { + let col = self.files.column(3).as_string::(); + col.is_valid(self.index).then(|| col.value(self.index)) + } + + pub fn partition_values(&self) -> Option { + self.files + .column_by_name("fileConstantValues") + .and_then(|col| col.as_struct_opt()) + .and_then(|s| s.column_by_name("partitionValues")) + .and_then(|arr| { + arr.is_valid(self.index) + .then(|| match Scalar::from_array(arr, self.index) { + Some(Scalar::Struct(s)) => Some(s), + _ => None, + }) + .flatten() + }) + } +} + +impl Iterator for LogicalFileView { + type Item = LogicalFileView; + + fn next(&mut self) -> Option { + if self.index < self.files.num_rows() { + let file = LogicalFileView { + files: self.files.clone(), + index: self.index, + }; + self.index += 1; + Some(file) + } else { + None + } + } +} + +fn validate_column<'a, T: Array + 'static>( + actions: &'a RecordBatch, + col: &'a [impl AsRef], +) -> DeltaResult<()> { + if let Ok(arr) = extract_column(actions, col) { + if arr.as_any().downcast_ref::().is_none() { + return Err(DeltaTableError::from( + crate::protocol::ProtocolError::InvalidField(format!("Invalid column: {:?}", arr)), + )); + } + if arr.null_count() > 0 { + return Err(DeltaTableError::from( + crate::protocol::ProtocolError::InvalidField(format!( + "Column has null values: {:?}", + arr + )), + )); + } + } else { + return Err(DeltaTableError::from( + crate::protocol::ProtocolError::InvalidField(format!("Column not found",)), + )); + } + Ok(()) +} + +fn extract_column<'a>( + mut parent: &'a dyn ProvidesColumnByName, + col: &[impl AsRef], +) -> DeltaResult<&'a ArrayRef> { + let mut field_names = col.iter(); + let Some(mut field_name) = field_names.next() else { + return Err(arrow_schema::ArrowError::SchemaError( + "Empty column path".to_string(), + ))?; + }; + loop { + let child = parent.column_by_name(field_name.as_ref()).ok_or_else(|| { + arrow_schema::ArrowError::SchemaError(format!("No such field: {}", field_name.as_ref())) + })?; + field_name = match field_names.next() { + Some(name) => name, + None => return Ok(child), + }; + parent = child + .as_any() + .downcast_ref::() + .ok_or_else(|| { + arrow_schema::ArrowError::SchemaError(format!( + "Not a struct: {}", + field_name.as_ref() + )) + })?; + } +} diff --git a/crates/core/src/kernel/snapshot_next/lazy.rs b/crates/core/src/kernel/snapshot_next/lazy.rs new file mode 100644 index 0000000000..386d0f63d6 --- /dev/null +++ b/crates/core/src/kernel/snapshot_next/lazy.rs @@ -0,0 +1,229 @@ +//! Snapshot of a Delta Table at a specific version. +//! +use std::sync::{Arc, LazyLock}; + +use arrow::compute::filter_record_batch; +use arrow_array::{BooleanArray, RecordBatch}; +use delta_kernel::actions::set_transaction::{SetTransactionMap, SetTransactionScanner}; +use delta_kernel::actions::{get_log_schema, REMOVE_NAME}; +use delta_kernel::actions::{Metadata, Protocol, SetTransaction}; +use delta_kernel::engine::arrow_data::ArrowEngineData; +use delta_kernel::engine::default::executor::tokio::{ + TokioBackgroundExecutor, TokioMultiThreadExecutor, +}; +use delta_kernel::engine::default::DefaultEngine; +use delta_kernel::schema::Schema; +use delta_kernel::snapshot::Snapshot as SnapshotInner; +use delta_kernel::table_properties::TableProperties; +use delta_kernel::{Engine, Expression, ExpressionRef, Table, Version}; +use itertools::Itertools; +use object_store::path::Path; +use object_store::ObjectStore; +use url::Url; + +use super::cache::CommitCacheObjectStore; +use super::Snapshot; +use crate::{DeltaResult, DeltaTableError}; + +// TODO: avoid repetitive parsing of json stats + +#[derive(Clone)] +pub struct LazySnapshot { + pub(super) inner: Arc, + engine: Arc, +} + +impl Snapshot for LazySnapshot { + fn table_root(&self) -> &Url { + &self.inner.table_root() + } + + fn version(&self) -> Version { + self.inner.version() + } + + fn schema(&self) -> &Schema { + self.inner.schema() + } + + fn protocol(&self) -> &Protocol { + self.inner.protocol() + } + + fn metadata(&self) -> &Metadata { + self.inner.metadata() + } + + fn table_properties(&self) -> &TableProperties { + &self.inner.table_properties() + } + + fn files(&self) -> DeltaResult>> { + Ok(self + .files_impl(None)? + .map(|batch| batch.map_err(|e| e.into()))) + } + + fn tombstones(&self) -> DeltaResult>> { + static META_PREDICATE: LazyLock> = LazyLock::new(|| { + Some(Arc::new( + Expression::column([REMOVE_NAME, "path"]).is_not_null(), + )) + }); + let read_schema = get_log_schema().project(&[REMOVE_NAME])?; + Ok(self + .inner + ._log_segment() + .replay( + self.engine.as_ref(), + read_schema.clone(), + read_schema, + META_PREDICATE.clone(), + )? + .map_ok(|(d, _)| Ok(RecordBatch::from(ArrowEngineData::try_from_engine_data(d)?))) + .flatten()) + } + + fn application_transactions(&self) -> DeltaResult { + let scanner = SetTransactionScanner::new(self.inner.clone()); + Ok(scanner.application_transactions(self.engine.as_ref())?) + } + + fn application_transaction( + &self, + app_id: impl AsRef, + ) -> DeltaResult> { + let scanner = SetTransactionScanner::new(self.inner.clone()); + Ok(scanner.application_transaction(self.engine.as_ref(), app_id.as_ref())?) + } +} + +impl LazySnapshot { + /// Create a new [`Snapshot`] instance. + pub fn new(inner: Arc, engine: Arc) -> Self { + Self { inner, engine } + } + + /// Create a new [`Snapshot`] instance for a table. + pub async fn try_new( + table: Table, + store: Arc, + version: impl Into>, + ) -> DeltaResult { + // TODO: how to deal with the dedicated IO runtime? Would this already be covered by the + // object store implementation pass to this? + let table_root = Path::from_url_path(table.location().path())?; + let store_str = format!("{}", store); + let is_local = store_str.starts_with("LocalFileSystem"); + let store = Arc::new(CommitCacheObjectStore::new(store)); + let handle = tokio::runtime::Handle::current(); + let engine: Arc = match handle.runtime_flavor() { + tokio::runtime::RuntimeFlavor::MultiThread => Arc::new(DefaultEngine::new_with_opts( + store, + table_root, + Arc::new(TokioMultiThreadExecutor::new(handle)), + !is_local, + )), + tokio::runtime::RuntimeFlavor::CurrentThread => Arc::new(DefaultEngine::new_with_opts( + store, + table_root, + Arc::new(TokioBackgroundExecutor::new()), + !is_local, + )), + _ => return Err(DeltaTableError::generic("unsupported runtime flavor")), + }; + + let snapshot = table.snapshot(engine.as_ref(), version.into())?; + Ok(Self::new(Arc::new(snapshot), engine)) + } + + /// A shared reference to the engine used for interacting with the Delta Table. + pub(super) fn engine_ref(&self) -> &Arc { + &self.engine + } + + /// Get the timestamp of the given version in miliscends since epoch. + /// + /// Extracts the timestamp from the commit file of the given version + /// from the current log segment. If the commit file is not part of the + /// current log segment, `None` is returned. + pub fn version_timestamp(&self, version: Version) -> Option { + self.inner + ._log_segment() + .ascending_commit_files + .iter() + .find(|f| f.version == version) + .map(|f| f.location.last_modified) + } + + /// read all active files from the log + fn files_impl( + &self, + predicate: impl Into>>, + ) -> DeltaResult>> { + let scan = self + .inner + .clone() + .scan_builder() + .with_predicate(predicate) + .build()?; + Ok(scan.scan_data(self.engine.as_ref())?.map(|res| { + res.and_then(|(data, mut predicate)| { + let batch: RecordBatch = ArrowEngineData::try_from_engine_data(data)?.into(); + if predicate.len() < batch.num_rows() { + predicate + .extend(std::iter::repeat(true).take(batch.num_rows() - predicate.len())); + } + Ok(filter_record_batch(&batch, &BooleanArray::from(predicate))?) + }) + })) + } +} + +#[cfg(test)] +mod tests { + use deltalake_test::acceptance::{read_dat_case, TestCaseInfo}; + use deltalake_test::TestResult; + + use super::super::tests::get_dat_dir; + use super::*; + + async fn load_snapshot() -> TestResult<()> { + // some comment + let mut dat_dir = get_dat_dir(); + dat_dir.push("multi_partitioned"); + + let dat_info: TestCaseInfo = read_dat_case(dat_dir)?; + let table_info = dat_info.table_summary()?; + + let table = Table::try_from_uri(dat_info.table_root()?)?; + + let snapshot = LazySnapshot::try_new( + table, + Arc::new(object_store::local::LocalFileSystem::default()), + None, + ) + .await?; + + assert_eq!(snapshot.version(), table_info.version); + assert_eq!( + ( + snapshot.protocol().min_reader_version(), + snapshot.protocol().min_writer_version() + ), + (table_info.min_reader_version, table_info.min_writer_version) + ); + + Ok(()) + } + + #[tokio::test(flavor = "multi_thread")] + async fn load_snapshot_multi() -> TestResult<()> { + load_snapshot().await + } + + #[tokio::test(flavor = "current_thread")] + async fn load_snapshot_current() -> TestResult<()> { + load_snapshot().await + } +} diff --git a/crates/core/src/kernel/snapshot_next/mod.rs b/crates/core/src/kernel/snapshot_next/mod.rs new file mode 100644 index 0000000000..879ef2824a --- /dev/null +++ b/crates/core/src/kernel/snapshot_next/mod.rs @@ -0,0 +1,161 @@ +//! Snapshot of a Delta Table at a specific version. +//! +use std::sync::Arc; + +use arrow_array::RecordBatch; +use delta_kernel::actions::visitors::SetTransactionMap; +use delta_kernel::actions::{Add, Metadata, Protocol, SetTransaction}; +use delta_kernel::expressions::{Scalar, StructData}; +use delta_kernel::schema::Schema; +use delta_kernel::table_properties::TableProperties; +use delta_kernel::Version; +use iterators::{AddIterator, AddView, AddViewItem}; +use url::Url; + +use crate::{DeltaResult, DeltaTableError}; + +pub use eager::EagerSnapshot; +pub use lazy::LazySnapshot; + +mod cache; +mod eager; +mod iterators; +mod lazy; + +// TODO: avoid repetitive parsing of json stats + +#[derive(thiserror::Error, Debug)] +enum SnapshotError { + #[error("Tried accessing file data at snapshot initialized with no files.")] + FilesNotInitialized, +} + +impl From for DeltaTableError { + fn from(e: SnapshotError) -> Self { + match &e { + SnapshotError::FilesNotInitialized => DeltaTableError::generic(e), + } + } +} + +/// Helper trait to extract individual values from a `StructData`. +pub trait StructDataExt { + fn get(&self, key: &str) -> Option<&Scalar>; +} + +impl StructDataExt for StructData { + fn get(&self, key: &str) -> Option<&Scalar> { + self.fields() + .iter() + .zip(self.values().iter()) + .find(|(k, _)| k.name() == key) + .map(|(_, v)| v) + } +} + +pub trait Snapshot { + /// Location where the Delta Table (metadata) is stored. + fn table_root(&self) -> &Url; + + /// Version of this `Snapshot` in the table. + fn version(&self) -> Version; + + /// Table [`Schema`] at this `Snapshot`s version. + fn schema(&self) -> &Schema; + + /// Table [`Metadata`] at this `Snapshot`s version. + fn metadata(&self) -> &Metadata; + + /// Table [`Protocol`] at this `Snapshot`s version. + fn protocol(&self) -> &Protocol; + + /// Get the [`TableProperties`] for this [`Snapshot`]. + fn table_properties(&self) -> &TableProperties; + + fn files(&self) -> DeltaResult>>; + + fn files_view( + &self, + ) -> DeltaResult>>> { + Ok(self.files()?.map(|r| r.and_then(|b| AddView::try_new(b)))) + } + + fn tombstones(&self) -> DeltaResult>>; + + /// Scan the Delta Log to obtain the latest transaction for all applications + /// + /// This method requires a full scan of the log to find all transactions. + /// When a specific application id is requested, it is much more efficient to use + /// [`application_transaction`](Self::application_transaction) instead. + fn application_transactions(&self) -> DeltaResult; + + /// Scan the Delta Log for the latest transaction entry for a specific application. + /// + /// Initiates a log scan, but terminates as soon as the transaction + /// for the given application is found. + fn application_transaction( + &self, + app_id: impl AsRef, + ) -> DeltaResult>; +} + +impl Snapshot for Arc { + fn table_root(&self) -> &Url { + self.as_ref().table_root() + } + + fn version(&self) -> Version { + self.as_ref().version() + } + + fn schema(&self) -> &Schema { + self.as_ref().schema() + } + + fn metadata(&self) -> &Metadata { + self.as_ref().metadata() + } + + fn protocol(&self) -> &Protocol { + self.as_ref().protocol() + } + + fn table_properties(&self) -> &TableProperties { + self.as_ref().table_properties() + } + + fn files(&self) -> DeltaResult>> { + self.as_ref().files() + } + + fn tombstones(&self) -> DeltaResult>> { + self.as_ref().tombstones() + } + + fn application_transactions(&self) -> DeltaResult { + self.as_ref().application_transactions() + } + + fn application_transaction( + &self, + app_id: impl AsRef, + ) -> DeltaResult> { + self.as_ref().application_transaction(app_id) + } +} + +#[cfg(test)] +mod tests { + use std::path::PathBuf; + + pub(super) fn get_dat_dir() -> PathBuf { + let d = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + let mut rep_root = d + .parent() + .and_then(|p| p.parent()) + .expect("valid directory") + .to_path_buf(); + rep_root.push("dat/out/reader_tests/generated"); + rep_root + } +} diff --git a/crates/core/src/storage/mod.rs b/crates/core/src/storage/mod.rs index 31f4f60c77..8361bf138e 100644 --- a/crates/core/src/storage/mod.rs +++ b/crates/core/src/storage/mod.rs @@ -30,7 +30,6 @@ pub use retry_ext::ObjectStoreRetryExt; use std::ops::Range; pub use utils::*; -pub(crate) mod cache; pub mod file; pub mod retry_ext; pub mod utils; From b0f794f85379064be8412a2ea2f0f67107ac5215 Mon Sep 17 00:00:00 2001 From: Robert Pack Date: Fri, 17 Jan 2025 00:13:49 +0100 Subject: [PATCH 07/23] test: run some more dat tests Signed-off-by: Robert Pack --- .../{load_dat => load-dat}/action.yaml | 6 +- .github/workflows/build.yml | 17 ++-- Cargo.toml | 14 +-- crates/core/Cargo.toml | 6 +- crates/core/tests/dat.rs | 99 +++++++++++++++++++ 5 files changed, 125 insertions(+), 17 deletions(-) rename .github/actions/{load_dat => load-dat}/action.yaml (81%) create mode 100644 crates/core/tests/dat.rs diff --git a/.github/actions/load_dat/action.yaml b/.github/actions/load-dat/action.yaml similarity index 81% rename from .github/actions/load_dat/action.yaml rename to .github/actions/load-dat/action.yaml index 071db58ba0..6d40707b3c 100644 --- a/.github/actions/load_dat/action.yaml +++ b/.github/actions/load-dat/action.yaml @@ -19,8 +19,8 @@ runs: - name: load DAT shell: bash run: | - rm -rf {{ inputs.target-directory }} + rm -rf ${{ inputs.target-directory }} curl -OL https://github.com/delta-incubator/dat/releases/download/v${{ inputs.version }}/deltalake-dat-v${{ inputs.version }}.tar.gz - mkdir -p {{ inputs.target-directory }} - tar --no-same-permissions -xzf deltalake-dat-v${{ inputs.version }}.tar.gz --directory {{ inputs.target-directory }} + mkdir -p ${{ inputs.target-directory }} + tar --no-same-permissions -xzf deltalake-dat-v${{ inputs.version }}.tar.gz --directory ${{ inputs.target-directory }} rm deltalake-dat-v${{ inputs.version }}.tar.gz diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 578ae305ea..823d10ff0c 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -20,7 +20,7 @@ jobs: uses: actions-rs/toolchain@v1 with: profile: default - toolchain: '1.81' + toolchain: "1.81" override: true - name: Format @@ -42,7 +42,7 @@ jobs: uses: actions-rs/toolchain@v1 with: profile: default - toolchain: '1.81' + toolchain: "1.81" override: true - name: build and lint with clippy @@ -79,9 +79,12 @@ jobs: uses: actions-rs/toolchain@v1 with: profile: default - toolchain: '1.81' + toolchain: "1.81" override: true + - name: Load DAT data + uses: ./.github/actions/load-dat + - name: Run tests run: cargo test --verbose --features ${{ env.DEFAULT_FEATURES }} @@ -114,7 +117,7 @@ jobs: uses: actions-rs/toolchain@v1 with: profile: default - toolchain: '1.81' + toolchain: "1.81" override: true # Install Java and Hadoop for HDFS integration tests @@ -129,6 +132,9 @@ jobs: tar -xf hadoop-3.4.0.tar.gz -C $GITHUB_WORKSPACE echo "$GITHUB_WORKSPACE/hadoop-3.4.0/bin" >> $GITHUB_PATH + - name: Load DAT data + uses: ./.github/actions/load-dat + - name: Start emulated services run: docker compose up -d @@ -160,7 +166,7 @@ jobs: uses: actions-rs/toolchain@v1 with: profile: default - toolchain: '1.81' + toolchain: "1.81" override: true - name: Download Lakectl @@ -175,4 +181,3 @@ jobs: - name: Run tests with rustls (default) run: | cargo test --features integration_test_lakefs,lakefs,datafusion - diff --git a/Cargo.toml b/Cargo.toml index c1bc6ea502..c3e53c69af 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,15 +26,15 @@ debug = true debug = "line-tables-only" [workspace.dependencies] -#delta_kernel = { version = "=0.6.0", features = ["default-engine"] } -delta_kernel = { path = "../delta-kernel-rs/kernel", features = [ - "default-engine", - "developer-visibility", -] } -# delta_kernel = { git = "https://github.com/roeap/delta-kernel-rs", rev = "fcc43b50dafdc5e6b84c206492bbde8ed1115529", features = [ +# delta_kernel = { version = "=0.6.0", features = ["default-engine"] } +# delta_kernel = { path = "../delta-kernel-rs/kernel", features = [ # "default-engine", # "developer-visibility", # ] } +delta_kernel = { git = "https://github.com/roeap/delta-kernel-rs", rev = "caeb70ab78e4d5f3b56b5105fd3587c1046d1e1b", features = [ + "default-engine", + "developer-visibility", +] } # arrow arrow = { version = "53" } @@ -48,7 +48,7 @@ arrow-ord = { version = "53" } arrow-row = { version = "53" } arrow-schema = { version = "53" } arrow-select = { version = "53" } -object_store = { version = "0.11.2" , features = ["cloud"]} +object_store = { version = "0.11.2", features = ["cloud"] } parquet = { version = "53" } # datafusion diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml index 232cc5f00d..6571371451 100644 --- a/crates/core/Cargo.toml +++ b/crates/core/Cargo.toml @@ -46,7 +46,7 @@ datafusion-functions-aggregate = { workspace = true, optional = true } # serde serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true } -strum = { workspace = true} +strum = { workspace = true } # "stdlib" bytes = { workspace = true } @@ -130,3 +130,7 @@ datafusion = [ datafusion-ext = ["datafusion"] json = ["parquet/json"] python = ["arrow/pyarrow"] + +[[test]] +name = "dat" +harness = false diff --git a/crates/core/tests/dat.rs b/crates/core/tests/dat.rs new file mode 100644 index 0000000000..82daf5c20e --- /dev/null +++ b/crates/core/tests/dat.rs @@ -0,0 +1,99 @@ +use std::path::Path; +use std::sync::Arc; + +use delta_kernel::Table; +use deltalake_core::kernel::snapshot_next::{LazySnapshot, Snapshot}; +use deltalake_test::acceptance::read_dat_case; + +static SKIPPED_TESTS: &[&str; 1] = &["iceberg_compat_v1"]; + +fn reader_test_lazy(path: &Path) -> datatest_stable::Result<()> { + let root_dir = format!( + "{}/{}", + env!["CARGO_MANIFEST_DIR"], + path.parent().unwrap().to_str().unwrap() + ); + for skipped in SKIPPED_TESTS { + if root_dir.ends_with(skipped) { + println!("Skipping test: {}", skipped); + return Ok(()); + } + } + + tokio::runtime::Builder::new_current_thread() + .enable_all() + .build()? + .block_on(async { + let case = read_dat_case(root_dir).unwrap(); + + let table = Table::try_from_uri(case.table_root().unwrap()).expect("table"); + let snapshot = LazySnapshot::try_new( + table, + Arc::new(object_store::local::LocalFileSystem::default()), + None, + ) + .await + .unwrap(); + + let table_info = case.table_summary().expect("load summary"); + assert_eq!(snapshot.version(), table_info.version); + assert_eq!( + ( + snapshot.protocol().min_reader_version(), + snapshot.protocol().min_writer_version() + ), + (table_info.min_reader_version, table_info.min_writer_version) + ); + }); + Ok(()) +} + +fn reader_test_eager(path: &Path) -> datatest_stable::Result<()> { + let root_dir = format!( + "{}/{}", + env!["CARGO_MANIFEST_DIR"], + path.parent().unwrap().to_str().unwrap() + ); + for skipped in SKIPPED_TESTS { + if root_dir.ends_with(skipped) { + println!("Skipping test: {}", skipped); + return Ok(()); + } + } + + tokio::runtime::Builder::new_current_thread() + .enable_all() + .build()? + .block_on(async { + let case = read_dat_case(root_dir).unwrap(); + + let table = Table::try_from_uri(case.table_root().unwrap()).expect("table"); + let snapshot = LazySnapshot::try_new( + table, + Arc::new(object_store::local::LocalFileSystem::default()), + None, + ) + .await + .unwrap(); + + let table_info = case.table_summary().expect("load summary"); + assert_eq!(snapshot.version(), table_info.version); + assert_eq!( + ( + snapshot.protocol().min_reader_version(), + snapshot.protocol().min_writer_version() + ), + (table_info.min_reader_version, table_info.min_writer_version) + ); + }); + Ok(()) +} + +datatest_stable::harness!( + reader_test_lazy, + "../../dat/out/reader_tests/generated/", + r"test_case_info\.json", + reader_test_eager, + "../../dat/out/reader_tests/generated/", + r"test_case_info\.json" +); From 7a559ac0f637854de46475a6ed183827ad762cb6 Mon Sep 17 00:00:00 2001 From: Robert Pack Date: Sun, 19 Jan 2025 00:48:34 +0100 Subject: [PATCH 08/23] feat: add commit infos apis to new snapshots Signed-off-by: Robert Pack --- Cargo.toml | 2 +- crates/core/src/kernel/snapshot_next/eager.rs | 20 +++- .../src/kernel/snapshot_next/iterators.rs | 6 +- crates/core/src/kernel/snapshot_next/lazy.rs | 61 +++++++++++- crates/core/src/kernel/snapshot_next/mod.rs | 98 ++++++++++++++++++- 5 files changed, 173 insertions(+), 14 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index c3e53c69af..3935efd8c3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -31,7 +31,7 @@ debug = "line-tables-only" # "default-engine", # "developer-visibility", # ] } -delta_kernel = { git = "https://github.com/roeap/delta-kernel-rs", rev = "caeb70ab78e4d5f3b56b5105fd3587c1046d1e1b", features = [ +delta_kernel = { git = "https://github.com/roeap/delta-kernel-rs", rev = "023abf1ee604b77bbaa5efec97e043fc4bdf220b", features = [ "default-engine", "developer-visibility", ] } diff --git a/crates/core/src/kernel/snapshot_next/eager.rs b/crates/core/src/kernel/snapshot_next/eager.rs index 88306b8e49..a1b0c0d4ca 100644 --- a/crates/core/src/kernel/snapshot_next/eager.rs +++ b/crates/core/src/kernel/snapshot_next/eager.rs @@ -2,7 +2,6 @@ use std::sync::Arc; use arrow::compute::{concat_batches, filter_record_batch}; use arrow_array::{BooleanArray, RecordBatch}; -use chrono::format::Item; use delta_kernel::actions::set_transaction::SetTransactionMap; use delta_kernel::actions::{get_log_add_schema, get_log_schema, ADD_NAME, REMOVE_NAME}; use delta_kernel::actions::{Add, Metadata, Protocol, SetTransaction}; @@ -11,7 +10,7 @@ use delta_kernel::log_segment::LogSegment; use delta_kernel::scan::log_replay::scan_action_iter; use delta_kernel::schema::Schema; use delta_kernel::table_properties::TableProperties; -use delta_kernel::{EngineData, Expression, Table, Version}; +use delta_kernel::{Engine, EngineData, Expression, Table, Version}; use itertools::Itertools; use object_store::ObjectStore; use url::Url; @@ -19,6 +18,7 @@ use url::Url; use super::iterators::{AddIterator, AddView, AddViewItem}; use super::lazy::LazySnapshot; use super::{Snapshot, SnapshotError}; +use crate::kernel::CommitInfo; use crate::{DeltaResult, DeltaTableConfig, DeltaTableError}; /// An eager snapshot of a Delta Table at a specific version. @@ -77,6 +77,14 @@ impl Snapshot for EagerSnapshot { ) -> DeltaResult> { self.snapshot.application_transaction(app_id) } + + fn commit_infos( + &self, + start_version: impl Into>, + limit: impl Into>, + ) -> DeltaResult> { + self.snapshot.commit_infos(start_version, limit) + } } impl EagerSnapshot { @@ -92,7 +100,7 @@ impl EagerSnapshot { LazySnapshot::try_new(Table::try_from_uri(table_root)?, store, version).await?; let files = config .require_files - .then(|| -> DeltaResult<_> { Ok(replay_file_actions(&snapshot)?) }) + .then(|| -> DeltaResult<_> { replay_file_actions(&snapshot) }) .transpose()?; Ok(Self { snapshot, @@ -101,6 +109,10 @@ impl EagerSnapshot { }) } + pub(crate) fn engine_ref(&self) -> &Arc { + self.snapshot.engine_ref() + } + pub fn file_data(&self) -> DeltaResult<&RecordBatch> { Ok(self .files @@ -122,7 +134,7 @@ impl EagerSnapshot { .files .as_ref() .map(|f| f.num_rows()) - .ok_or_else(|| SnapshotError::FilesNotInitialized)?) + .ok_or(SnapshotError::FilesNotInitialized)?) } pub(crate) fn update(&mut self) -> DeltaResult<()> { diff --git a/crates/core/src/kernel/snapshot_next/iterators.rs b/crates/core/src/kernel/snapshot_next/iterators.rs index 4700cb9da3..375fc0061e 100644 --- a/crates/core/src/kernel/snapshot_next/iterators.rs +++ b/crates/core/src/kernel/snapshot_next/iterators.rs @@ -25,7 +25,7 @@ pub struct AddIterator<'a> { } impl AddIterator<'_> { - pub fn try_new<'a>(actions: &'a RecordBatch) -> DeltaResult> { + pub fn try_new(actions: &RecordBatch) -> DeltaResult> { validate_column::(actions, &[ADD_NAME, "path"])?; validate_column::(actions, &[ADD_NAME, "size"])?; validate_column::(actions, &[ADD_NAME, "modificationTime"])?; @@ -108,7 +108,7 @@ pub struct AddViewItem { } impl AddViewItem { - pub fn path(&self) -> &str { + pub fn path(&self) -> &str { extract_column(&self.actions, &[ADD_NAME, "path"]) .unwrap() .as_string::() @@ -273,7 +273,7 @@ fn validate_column<'a, T: Array + 'static>( } } else { return Err(DeltaTableError::from( - crate::protocol::ProtocolError::InvalidField(format!("Column not found",)), + crate::protocol::ProtocolError::InvalidField("Column not found".to_string()), )); } Ok(()) diff --git a/crates/core/src/kernel/snapshot_next/lazy.rs b/crates/core/src/kernel/snapshot_next/lazy.rs index 386d0f63d6..2125b70d93 100644 --- a/crates/core/src/kernel/snapshot_next/lazy.rs +++ b/crates/core/src/kernel/snapshot_next/lazy.rs @@ -1,5 +1,6 @@ //! Snapshot of a Delta Table at a specific version. //! +use std::io::{BufRead, BufReader, Cursor}; use std::sync::{Arc, LazyLock}; use arrow::compute::filter_record_batch; @@ -12,6 +13,7 @@ use delta_kernel::engine::default::executor::tokio::{ TokioBackgroundExecutor, TokioMultiThreadExecutor, }; use delta_kernel::engine::default::DefaultEngine; +use delta_kernel::log_segment::LogSegment; use delta_kernel::schema::Schema; use delta_kernel::snapshot::Snapshot as SnapshotInner; use delta_kernel::table_properties::TableProperties; @@ -23,6 +25,7 @@ use url::Url; use super::cache::CommitCacheObjectStore; use super::Snapshot; +use crate::kernel::{Action, CommitInfo}; use crate::{DeltaResult, DeltaTableError}; // TODO: avoid repetitive parsing of json stats @@ -35,7 +38,7 @@ pub struct LazySnapshot { impl Snapshot for LazySnapshot { fn table_root(&self) -> &Url { - &self.inner.table_root() + self.inner.table_root() } fn version(&self) -> Version { @@ -55,7 +58,7 @@ impl Snapshot for LazySnapshot { } fn table_properties(&self) -> &TableProperties { - &self.inner.table_properties() + self.inner.table_properties() } fn files(&self) -> DeltaResult>> { @@ -96,6 +99,58 @@ impl Snapshot for LazySnapshot { let scanner = SetTransactionScanner::new(self.inner.clone()); Ok(scanner.application_transaction(self.engine.as_ref(), app_id.as_ref())?) } + + fn commit_infos( + &self, + start_version: impl Into>, + limit: impl Into>, + ) -> DeltaResult> { + // let start_version = start_version.into(); + let fs_client = self.engine.get_file_system_client(); + let end_version = start_version.into().unwrap_or_else(|| self.version()); + let start_version = limit + .into() + .and_then(|limit| { + if limit == 0 { + Some(end_version) + } else { + Some(end_version.saturating_sub(limit as u64 - 1)) + } + }) + .unwrap_or(0); + let log_root = self.inner.table_root().join("_delta_log").unwrap(); + let mut log_segment = LogSegment::for_table_changes( + fs_client.as_ref(), + log_root, + start_version, + end_version, + )?; + log_segment.ascending_commit_files.reverse(); + let files = log_segment + .ascending_commit_files + .iter() + .map(|commit_file| (commit_file.location.location.clone(), None)) + .collect_vec(); + + Ok(fs_client + .read_files(files)? + .zip(log_segment.ascending_commit_files.into_iter()) + .filter_map(|(data, path)| { + data.ok().and_then(|d| { + let reader = BufReader::new(Cursor::new(d)); + for line in reader.lines() { + match line.and_then(|l| Ok(serde_json::from_str::(&l)?)) { + Ok(Action::CommitInfo(commit_info)) => { + return Some((path.version, commit_info)) + } + Err(e) => return None, + _ => continue, + }; + } + None + }) + })) + } } impl LazySnapshot { @@ -138,7 +193,7 @@ impl LazySnapshot { } /// A shared reference to the engine used for interacting with the Delta Table. - pub(super) fn engine_ref(&self) -> &Arc { + pub(crate) fn engine_ref(&self) -> &Arc { &self.engine } diff --git a/crates/core/src/kernel/snapshot_next/mod.rs b/crates/core/src/kernel/snapshot_next/mod.rs index 879ef2824a..b02367c3d0 100644 --- a/crates/core/src/kernel/snapshot_next/mod.rs +++ b/crates/core/src/kernel/snapshot_next/mod.rs @@ -4,14 +4,15 @@ use std::sync::Arc; use arrow_array::RecordBatch; use delta_kernel::actions::visitors::SetTransactionMap; -use delta_kernel::actions::{Add, Metadata, Protocol, SetTransaction}; +use delta_kernel::actions::{Metadata, Protocol, SetTransaction}; use delta_kernel::expressions::{Scalar, StructData}; use delta_kernel::schema::Schema; use delta_kernel::table_properties::TableProperties; use delta_kernel::Version; -use iterators::{AddIterator, AddView, AddViewItem}; +use iterators::{AddView, AddViewItem}; use url::Url; +use crate::kernel::actions::CommitInfo; use crate::{DeltaResult, DeltaTableError}; pub use eager::EagerSnapshot; @@ -77,7 +78,7 @@ pub trait Snapshot { fn files_view( &self, ) -> DeltaResult>>> { - Ok(self.files()?.map(|r| r.and_then(|b| AddView::try_new(b)))) + Ok(self.files()?.map(|r| r.and_then(AddView::try_new))) } fn tombstones(&self) -> DeltaResult>>; @@ -93,10 +94,40 @@ pub trait Snapshot { /// /// Initiates a log scan, but terminates as soon as the transaction /// for the given application is found. + /// + /// # Parameters + /// - `app_id`: The application id for which to fetch the transaction. + /// + /// # Returns + /// The latest transaction for the given application id, if it exists. fn application_transaction( &self, app_id: impl AsRef, ) -> DeltaResult>; + + /// Get commit info for the table. + /// + /// The [`CommitInfo`]s are returned in descending order of version + /// with the most recent commit first starting from the `start_version`. + /// + /// [`CommitInfo`]s are read on a best-effort basis. If the action + /// for a version is not available or cannot be parsed, it is skipped. + /// + /// # Parameters + /// - `start_version`: The version from which to start fetching commit info. + /// Defaults to the latest version. + /// - `limit`: The maximum number of commit infos to fetch. + /// + /// # Returns + /// An iterator of commit info tuples. The first element of the tuple is the version + /// of the commit, the second element is the corresponding commit info. + // TODO(roeap): this is currently using our commit info, we should be using + // the definition form kernel, once handling over there matured. + fn commit_infos( + &self, + start_version: impl Into>, + limit: impl Into>, + ) -> DeltaResult>; } impl Snapshot for Arc { @@ -142,6 +173,67 @@ impl Snapshot for Arc { ) -> DeltaResult> { self.as_ref().application_transaction(app_id) } + + fn commit_infos( + &self, + start_version: impl Into>, + limit: impl Into>, + ) -> DeltaResult> { + self.as_ref().commit_infos(start_version, limit) + } +} + +impl Snapshot for Box { + fn table_root(&self) -> &Url { + self.as_ref().table_root() + } + + fn version(&self) -> Version { + self.as_ref().version() + } + + fn schema(&self) -> &Schema { + self.as_ref().schema() + } + + fn metadata(&self) -> &Metadata { + self.as_ref().metadata() + } + + fn protocol(&self) -> &Protocol { + self.as_ref().protocol() + } + + fn table_properties(&self) -> &TableProperties { + self.as_ref().table_properties() + } + + fn files(&self) -> DeltaResult>> { + self.as_ref().files() + } + + fn tombstones(&self) -> DeltaResult>> { + self.as_ref().tombstones() + } + + fn application_transactions(&self) -> DeltaResult { + self.as_ref().application_transactions() + } + + fn application_transaction( + &self, + app_id: impl AsRef, + ) -> DeltaResult> { + self.as_ref().application_transaction(app_id) + } + + fn commit_infos( + &self, + start_version: impl Into>, + limit: impl Into>, + ) -> DeltaResult> { + self.as_ref().commit_infos(start_version, limit) + } } #[cfg(test)] From adb9df8e3ae73aabafc317fe05f6f89be55f43d3 Mon Sep 17 00:00:00 2001 From: Robert Pack Date: Mon, 20 Jan 2025 00:48:02 +0100 Subject: [PATCH 09/23] feat: snapshot updates and improved file data iterators Signed-off-by: Robert Pack --- .github/workflows/codecov.yml | 10 +- Cargo.toml | 2 +- crates/core/src/kernel/snapshot_next/eager.rs | 118 +++++++++++------- .../src/kernel/snapshot_next/iterators.rs | 106 +++++++++++----- crates/core/src/kernel/snapshot_next/lazy.rs | 51 ++++---- crates/core/src/kernel/snapshot_next/mod.rs | 116 ++++++++--------- 6 files changed, 234 insertions(+), 169 deletions(-) diff --git a/.github/workflows/codecov.yml b/.github/workflows/codecov.yml index 69212c55b0..dbb6fbd0ad 100644 --- a/.github/workflows/codecov.yml +++ b/.github/workflows/codecov.yml @@ -16,17 +16,25 @@ jobs: CARGO_TERM_COLOR: always steps: - uses: actions/checkout@v4 + - name: Install rust uses: actions-rs/toolchain@v1 with: profile: default - toolchain: '1.81' + toolchain: "1.81" override: true + - name: Install cargo-llvm-cov uses: taiki-e/install-action@cargo-llvm-cov + - uses: Swatinem/rust-cache@v2 + + - name: Load DAT data + uses: ./.github/actions/load-dat + - name: Generate code coverage run: cargo llvm-cov --features ${DEFAULT_FEATURES} --workspace --codecov --output-path codecov.json -- --skip read_table_version_hdfs --skip test_read_tables_lakefs + - name: Upload coverage to Codecov uses: codecov/codecov-action@v4 with: diff --git a/Cargo.toml b/Cargo.toml index 3935efd8c3..3981d0f095 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -31,7 +31,7 @@ debug = "line-tables-only" # "default-engine", # "developer-visibility", # ] } -delta_kernel = { git = "https://github.com/roeap/delta-kernel-rs", rev = "023abf1ee604b77bbaa5efec97e043fc4bdf220b", features = [ +delta_kernel = { git = "https://github.com/roeap/delta-kernel-rs", rev = "2e09bfcc0447283a3acc320ad2350f4075dba83e", features = [ "default-engine", "developer-visibility", ] } diff --git a/crates/core/src/kernel/snapshot_next/eager.rs b/crates/core/src/kernel/snapshot_next/eager.rs index a1b0c0d4ca..16e5073e40 100644 --- a/crates/core/src/kernel/snapshot_next/eager.rs +++ b/crates/core/src/kernel/snapshot_next/eager.rs @@ -10,12 +10,12 @@ use delta_kernel::log_segment::LogSegment; use delta_kernel::scan::log_replay::scan_action_iter; use delta_kernel::schema::Schema; use delta_kernel::table_properties::TableProperties; -use delta_kernel::{Engine, EngineData, Expression, Table, Version}; +use delta_kernel::{Engine, EngineData, Expression, ExpressionRef, Table, Version}; use itertools::Itertools; use object_store::ObjectStore; use url::Url; -use super::iterators::{AddIterator, AddView, AddViewItem}; +use super::iterators::AddIterator; use super::lazy::LazySnapshot; use super::{Snapshot, SnapshotError}; use crate::kernel::CommitInfo; @@ -28,7 +28,6 @@ use crate::{DeltaResult, DeltaTableConfig, DeltaTableError}; pub struct EagerSnapshot { snapshot: LazySnapshot, files: Option, - predicate: Option>, } impl Snapshot for EagerSnapshot { @@ -56,11 +55,15 @@ impl Snapshot for EagerSnapshot { self.snapshot.table_properties() } - fn files(&self) -> DeltaResult>> { - Ok(std::iter::once(Ok(self - .files - .clone() - .ok_or(SnapshotError::FilesNotInitialized)?))) + fn files( + &self, + predicate: impl Into>, + ) -> DeltaResult>> { + Ok(std::iter::once(scan_as_log_data( + &self.snapshot, + vec![(self.file_data()?.clone(), false)], + predicate, + ))) } fn tombstones(&self) -> DeltaResult>> { @@ -85,6 +88,10 @@ impl Snapshot for EagerSnapshot { ) -> DeltaResult> { self.snapshot.commit_infos(start_version, limit) } + + fn update(&mut self, target_version: impl Into>) -> DeltaResult { + self.update_impl(target_version) + } } impl EagerSnapshot { @@ -94,7 +101,6 @@ impl EagerSnapshot { store: Arc, config: DeltaTableConfig, version: impl Into>, - predicate: impl Into>>, ) -> DeltaResult { let snapshot = LazySnapshot::try_new(Table::try_from_uri(table_root)?, store, version).await?; @@ -102,11 +108,7 @@ impl EagerSnapshot { .require_files .then(|| -> DeltaResult<_> { replay_file_actions(&snapshot) }) .transpose()?; - Ok(Self { - snapshot, - files, - predicate: predicate.into(), - }) + Ok(Self { snapshot, files }) } pub(crate) fn engine_ref(&self) -> &Arc { @@ -120,10 +122,6 @@ impl EagerSnapshot { .ok_or(SnapshotError::FilesNotInitialized)?) } - pub fn files(&self) -> DeltaResult> { - AddView::try_new(self.file_data()?.clone()) - } - pub fn file_actions(&self) -> DeltaResult> + '_> { AddIterator::try_new(self.file_data()?) } @@ -137,14 +135,28 @@ impl EagerSnapshot { .ok_or(SnapshotError::FilesNotInitialized)?) } - pub(crate) fn update(&mut self) -> DeltaResult<()> { - let log_root = self.snapshot.table_root().join("_delta_log/").unwrap(); - let fs_client = self.snapshot.engine_ref().get_file_system_client(); + pub(crate) fn update_impl( + &mut self, + target_version: impl Into>, + ) -> DeltaResult { + let target_version = target_version.into(); + + let mut snapshot = self.snapshot.clone(); + if !snapshot.update(target_version.clone())? { + return Ok(false); + } + + let log_root = snapshot.table_root().join("_delta_log/").unwrap(); + let fs_client = snapshot.engine_ref().get_file_system_client(); let commit_read_schema = get_log_schema().project(&[ADD_NAME, REMOVE_NAME])?; let checkpoint_read_schema = get_log_add_schema().clone(); - let segment = - LogSegment::for_table_changes(fs_client.as_ref(), log_root, self.version() + 1, None)?; + let segment = LogSegment::for_table_changes( + fs_client.as_ref(), + log_root, + self.snapshot.version() + 1, + snapshot.version(), + )?; let mut slice_iter = segment .replay( self.snapshot.engine_ref().as_ref(), @@ -168,9 +180,9 @@ impl EagerSnapshot { false, )); - self.files = Some(scan_as_log_data(&self.snapshot, slice_iter)?); + self.files = Some(scan_as_log_data(&self.snapshot, slice_iter, None)?); - Ok(()) + Ok(true) } } @@ -195,12 +207,13 @@ fn replay_file_actions(snapshot: &LazySnapshot) -> DeltaResult { .flatten() .collect::, _>>()?; - scan_as_log_data(snapshot, curr_data) + scan_as_log_data(snapshot, curr_data, None) } fn scan_as_log_data( snapshot: &LazySnapshot, curr_data: Vec<(RecordBatch, bool)>, + predicate: impl Into>, ) -> Result { let scan_iter = curr_data.clone().into_iter().map(|(data, flag)| { Ok(( @@ -209,24 +222,36 @@ fn scan_as_log_data( )) }); - let res = scan_action_iter(snapshot.engine_ref().as_ref(), scan_iter, None) - .map(|res| { - res.and_then(|(d, selection)| { - Ok(( - RecordBatch::from(ArrowEngineData::try_from_engine_data(d)?), - selection, - )) - }) - }) - .zip(curr_data.into_iter()) - .map(|(scan_res, (data_raw, _))| match scan_res { - Ok((_, selection)) => { - let data = filter_record_batch(&data_raw, &BooleanArray::from(selection))?; - Ok(data.project(&[0])?) - } - Err(e) => Err(e), + let scan = snapshot + .inner + .clone() + .scan_builder() + .with_predicate(predicate) + .build()?; + + let res = scan_action_iter( + snapshot.engine_ref().as_ref(), + scan_iter, + scan.physical_predicate() + .map(|p| (p, scan.schema().clone())), + ) + .map(|res| { + res.and_then(|(d, selection)| { + Ok(( + RecordBatch::from(ArrowEngineData::try_from_engine_data(d)?), + selection, + )) }) - .collect::, _>>()?; + }) + .zip(curr_data.into_iter()) + .map(|(scan_res, (data_raw, _))| match scan_res { + Ok((_, selection)) => { + let data = filter_record_batch(&data_raw, &BooleanArray::from(selection))?; + Ok(data.project(&[0])?) + } + Err(e) => Err(e), + }) + .collect::, _>>()?; Ok(concat_batches(res[0].schema_ref(), &res)?) } @@ -253,18 +278,19 @@ mod tests { table.location(), Arc::new(object_store::local::LocalFileSystem::default()), Default::default(), - Some(1), - None, + 0, ) .await?; + println!("before update"); + // assert_eq!(snapshot.version(), table_info.version); // assert_eq!( // snapshot.protocol().min_reader_version(), // table_info.min_reader_version // ); - snapshot.update()?; + snapshot.update(None)?; for file in snapshot.file_actions()? { println!("file: {:#?}", file.unwrap()); diff --git a/crates/core/src/kernel/snapshot_next/iterators.rs b/crates/core/src/kernel/snapshot_next/iterators.rs index 375fc0061e..9a01e24254 100644 --- a/crates/core/src/kernel/snapshot_next/iterators.rs +++ b/crates/core/src/kernel/snapshot_next/iterators.rs @@ -4,7 +4,8 @@ use std::sync::Arc; use arrow_array::cast::AsArray; use arrow_array::types::Int64Type; use arrow_array::{ - Array, ArrayRef, BooleanArray, Int64Array, RecordBatch, StringArray, StructArray, + Array, ArrayRef, BooleanArray, Int64Array, RecordBatch, RecordBatchReader, StringArray, + StructArray, }; use chrono::{DateTime, Utc}; use delta_kernel::actions::visitors::AddVisitor; @@ -76,38 +77,6 @@ pub struct AddView { } impl AddView { - pub fn try_new(actions: RecordBatch) -> DeltaResult { - validate_column::(&actions, &[ADD_NAME, "path"])?; - validate_column::(&actions, &[ADD_NAME, "size"])?; - validate_column::(&actions, &[ADD_NAME, "modificationTime"])?; - validate_column::(&actions, &[ADD_NAME, "dataChange"])?; - Ok(Self { actions, index: 0 }) - } -} - -impl Iterator for AddView { - type Item = AddViewItem; - - fn next(&mut self) -> Option { - if self.index < self.actions.num_rows() { - let add = AddViewItem { - actions: self.actions.clone(), - index: self.index, - }; - self.index += 1; - Some(add) - } else { - None - } - } -} - -pub struct AddViewItem { - actions: RecordBatch, - index: usize, -} - -impl AddViewItem { pub fn path(&self) -> &str { extract_column(&self.actions, &[ADD_NAME, "path"]) .unwrap() @@ -253,6 +222,77 @@ impl Iterator for LogicalFileView { } } +pub struct AddViewIterator +where + I: IntoIterator>, +{ + inner: I::IntoIter, + batch: Option, + current: usize, +} + +impl AddViewIterator +where + I: IntoIterator>, +{ + /// Create a new [AddViewIterator]. + /// + /// If `iter` is an infallible iterator, use `.map(Ok)`. + pub fn new(iter: I) -> Self { + Self { + inner: iter.into_iter(), + batch: None, + current: 0, + } + } +} + +impl Iterator for AddViewIterator +where + I: IntoIterator>, +{ + type Item = DeltaResult; + + fn next(&mut self) -> Option { + if let Some(batch) = &self.batch { + if self.current < batch.num_rows() { + let item = AddView { + actions: batch.clone(), + index: self.current, + }; + self.current += 1; + return Some(Ok(item)); + } + } + match self.inner.next() { + Some(Ok(batch)) => { + if validate_add(&batch).is_err() { + return Some(Err(DeltaTableError::generic( + "Invalid add action data encountered.", + ))); + } + self.batch = Some(batch); + self.current = 0; + self.next() + } + Some(Err(e)) => Some(Err(e)), + None => None, + } + } + + fn size_hint(&self) -> (usize, Option) { + self.inner.size_hint() + } +} + +pub(crate) fn validate_add(batch: &RecordBatch) -> DeltaResult<()> { + validate_column::(batch, &[ADD_NAME, "path"])?; + validate_column::(batch, &[ADD_NAME, "size"])?; + validate_column::(batch, &[ADD_NAME, "modificationTime"])?; + validate_column::(batch, &[ADD_NAME, "dataChange"])?; + Ok(()) +} + fn validate_column<'a, T: Array + 'static>( actions: &'a RecordBatch, col: &'a [impl AsRef], diff --git a/crates/core/src/kernel/snapshot_next/lazy.rs b/crates/core/src/kernel/snapshot_next/lazy.rs index 2125b70d93..1a360d8371 100644 --- a/crates/core/src/kernel/snapshot_next/lazy.rs +++ b/crates/core/src/kernel/snapshot_next/lazy.rs @@ -61,9 +61,24 @@ impl Snapshot for LazySnapshot { self.inner.table_properties() } - fn files(&self) -> DeltaResult>> { - Ok(self - .files_impl(None)? + fn files( + &self, + predicate: impl Into>>, + ) -> DeltaResult>> { + let scan = self + .inner + .clone() + .scan_builder() + .with_predicate(predicate) + .build()?; + Ok(scan + .scan_data(self.engine.as_ref())? + .map(|res| { + res.and_then(|(data, predicate)| { + let batch: RecordBatch = ArrowEngineData::try_from_engine_data(data)?.into(); + Ok(filter_record_batch(&batch, &BooleanArray::from(predicate))?) + }) + }) .map(|batch| batch.map_err(|e| e.into()))) } @@ -151,6 +166,13 @@ impl Snapshot for LazySnapshot { }) })) } + + fn update(&mut self, target_version: impl Into>) -> DeltaResult { + let mut snapshot = self.inner.as_ref().clone(); + let did_update = snapshot.update(target_version, self.engine_ref().as_ref())?; + self.inner = Arc::new(snapshot); + Ok(did_update) + } } impl LazySnapshot { @@ -210,29 +232,6 @@ impl LazySnapshot { .find(|f| f.version == version) .map(|f| f.location.last_modified) } - - /// read all active files from the log - fn files_impl( - &self, - predicate: impl Into>>, - ) -> DeltaResult>> { - let scan = self - .inner - .clone() - .scan_builder() - .with_predicate(predicate) - .build()?; - Ok(scan.scan_data(self.engine.as_ref())?.map(|res| { - res.and_then(|(data, mut predicate)| { - let batch: RecordBatch = ArrowEngineData::try_from_engine_data(data)?.into(); - if predicate.len() < batch.num_rows() { - predicate - .extend(std::iter::repeat(true).take(batch.num_rows() - predicate.len())); - } - Ok(filter_record_batch(&batch, &BooleanArray::from(predicate))?) - }) - })) - } } #[cfg(test)] diff --git a/crates/core/src/kernel/snapshot_next/mod.rs b/crates/core/src/kernel/snapshot_next/mod.rs index b02367c3d0..9acd55494d 100644 --- a/crates/core/src/kernel/snapshot_next/mod.rs +++ b/crates/core/src/kernel/snapshot_next/mod.rs @@ -1,15 +1,13 @@ //! Snapshot of a Delta Table at a specific version. -//! -use std::sync::Arc; use arrow_array::RecordBatch; use delta_kernel::actions::visitors::SetTransactionMap; -use delta_kernel::actions::{Metadata, Protocol, SetTransaction}; +use delta_kernel::actions::{Add, Metadata, Protocol, SetTransaction}; use delta_kernel::expressions::{Scalar, StructData}; use delta_kernel::schema::Schema; use delta_kernel::table_properties::TableProperties; -use delta_kernel::Version; -use iterators::{AddView, AddViewItem}; +use delta_kernel::{ExpressionRef, Version}; +use iterators::{AddView, AddViewIterator}; use url::Url; use crate::kernel::actions::CommitInfo; @@ -54,6 +52,10 @@ impl StructDataExt for StructData { } } +/// In-memory representation of a specific snapshot of a Delta table. While a `DeltaTable` exists +/// throughout time, `Snapshot`s represent a view of a table at a specific point in time; they +/// have a defined schema (which may change over time for any given table), specific version, and +/// frozen log segment. pub trait Snapshot { /// Location where the Delta Table (metadata) is stored. fn table_root(&self) -> &Url; @@ -65,22 +67,47 @@ pub trait Snapshot { fn schema(&self) -> &Schema; /// Table [`Metadata`] at this `Snapshot`s version. + /// + /// Metadata contains information about the table, such as the table name, + /// the schema, the partition columns, the configuration, etc. fn metadata(&self) -> &Metadata; /// Table [`Protocol`] at this `Snapshot`s version. + /// + /// The protocol indicates the min reader / writer version required to + /// read / write the table. For modern readers / writers, the reader / + /// writer features active in the table are also available. fn protocol(&self) -> &Protocol; /// Get the [`TableProperties`] for this [`Snapshot`]. fn table_properties(&self) -> &TableProperties; - fn files(&self) -> DeltaResult>>; + /// Get all currently active files in the table. + /// + /// # Parameters + /// - `predicate`: An optional predicate to filter the files based on file statistics. + /// + /// # Returns + /// An iterator of [`RecordBatch`]es, where each batch contains add action data. + fn files( + &self, + predicate: impl Into>, + ) -> DeltaResult>>; fn files_view( &self, - ) -> DeltaResult>>> { - Ok(self.files()?.map(|r| r.and_then(AddView::try_new))) + predicate: impl Into>, + ) -> DeltaResult>> { + Ok(AddViewIterator::new(self.files(predicate)?)) } + /// Get all tombstones in the table. + /// + /// Remove Actions (tombstones) are records that indicate that a file has been deleted. + /// They are returned mostly for the purposes of VACUUM operations. + /// + /// # Returns + /// An iterator of [`RecordBatch`]es, where each batch contains remove action data. fn tombstones(&self) -> DeltaResult>>; /// Scan the Delta Log to obtain the latest transaction for all applications @@ -128,59 +155,17 @@ pub trait Snapshot { start_version: impl Into>, limit: impl Into>, ) -> DeltaResult>; -} - -impl Snapshot for Arc { - fn table_root(&self) -> &Url { - self.as_ref().table_root() - } - - fn version(&self) -> Version { - self.as_ref().version() - } - fn schema(&self) -> &Schema { - self.as_ref().schema() - } - - fn metadata(&self) -> &Metadata { - self.as_ref().metadata() - } - - fn protocol(&self) -> &Protocol { - self.as_ref().protocol() - } - - fn table_properties(&self) -> &TableProperties { - self.as_ref().table_properties() - } - - fn files(&self) -> DeltaResult>> { - self.as_ref().files() - } - - fn tombstones(&self) -> DeltaResult>> { - self.as_ref().tombstones() - } - - fn application_transactions(&self) -> DeltaResult { - self.as_ref().application_transactions() - } - - fn application_transaction( - &self, - app_id: impl AsRef, - ) -> DeltaResult> { - self.as_ref().application_transaction(app_id) - } - - fn commit_infos( - &self, - start_version: impl Into>, - limit: impl Into>, - ) -> DeltaResult> { - self.as_ref().commit_infos(start_version, limit) - } + /// Update the snapshot to a specific version. + /// + /// The target version must be greater then the current version of the snapshot. + /// + /// # Parameters + /// - `target_version`: The version to update the snapshot to. Defaults to latest. + /// + /// # Returns + /// A boolean indicating if the snapshot was updated. + fn update(&mut self, target_version: impl Into>) -> DeltaResult; } impl Snapshot for Box { @@ -208,8 +193,11 @@ impl Snapshot for Box { self.as_ref().table_properties() } - fn files(&self) -> DeltaResult>> { - self.as_ref().files() + fn files( + &self, + predicate: impl Into>, + ) -> DeltaResult>> { + self.as_ref().files(predicate) } fn tombstones(&self) -> DeltaResult>> { @@ -234,6 +222,10 @@ impl Snapshot for Box { ) -> DeltaResult> { self.as_ref().commit_infos(start_version, limit) } + + fn update(&mut self, target_version: impl Into>) -> DeltaResult { + self.as_mut().update(target_version) + } } #[cfg(test)] From f3b0edb08389dc4814670be2364ccc27b3a91f6d Mon Sep 17 00:00:00 2001 From: Robert Pack Date: Tue, 21 Jan 2025 15:02:11 +0100 Subject: [PATCH 10/23] fix: cocnsistent schemas in file replay and object safe snapshot trait Signed-off-by: Robert Pack --- Cargo.toml | 10 +- crates/core/src/kernel/snapshot_next/eager.rs | 171 ++---------- .../src/kernel/snapshot_next/iterators.rs | 8 +- crates/core/src/kernel/snapshot_next/lazy.rs | 117 ++++---- crates/core/src/kernel/snapshot_next/mod.rs | 259 ++++++++++++++++-- 5 files changed, 327 insertions(+), 238 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 3981d0f095..0bbe1e07ab 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,14 +27,14 @@ debug = "line-tables-only" [workspace.dependencies] # delta_kernel = { version = "=0.6.0", features = ["default-engine"] } -# delta_kernel = { path = "../delta-kernel-rs/kernel", features = [ -# "default-engine", -# "developer-visibility", -# ] } -delta_kernel = { git = "https://github.com/roeap/delta-kernel-rs", rev = "2e09bfcc0447283a3acc320ad2350f4075dba83e", features = [ +delta_kernel = { path = "../delta-kernel-rs/kernel", features = [ "default-engine", "developer-visibility", ] } +# delta_kernel = { git = "https://github.com/roeap/delta-kernel-rs", rev = "2e09bfcc0447283a3acc320ad2350f4075dba83e", features = [ +# "default-engine", +# "developer-visibility", +# ] } # arrow arrow = { version = "53" } diff --git a/crates/core/src/kernel/snapshot_next/eager.rs b/crates/core/src/kernel/snapshot_next/eager.rs index 16e5073e40..83e0a00863 100644 --- a/crates/core/src/kernel/snapshot_next/eager.rs +++ b/crates/core/src/kernel/snapshot_next/eager.rs @@ -1,23 +1,21 @@ use std::sync::Arc; -use arrow::compute::{concat_batches, filter_record_batch}; -use arrow_array::{BooleanArray, RecordBatch}; +use arrow_array::RecordBatch; use delta_kernel::actions::set_transaction::SetTransactionMap; use delta_kernel::actions::{get_log_add_schema, get_log_schema, ADD_NAME, REMOVE_NAME}; use delta_kernel::actions::{Add, Metadata, Protocol, SetTransaction}; use delta_kernel::engine::arrow_data::ArrowEngineData; use delta_kernel::log_segment::LogSegment; -use delta_kernel::scan::log_replay::scan_action_iter; use delta_kernel::schema::Schema; use delta_kernel::table_properties::TableProperties; -use delta_kernel::{Engine, EngineData, Expression, ExpressionRef, Table, Version}; +use delta_kernel::{ExpressionRef, Table, Version}; use itertools::Itertools; use object_store::ObjectStore; use url::Url; use super::iterators::AddIterator; use super::lazy::LazySnapshot; -use super::{Snapshot, SnapshotError}; +use super::{replay_file_actions, scan_as_log_data, Snapshot, SnapshotError}; use crate::kernel::CommitInfo; use crate::{DeltaResult, DeltaTableConfig, DeltaTableError}; @@ -55,18 +53,25 @@ impl Snapshot for EagerSnapshot { self.snapshot.table_properties() } + fn logical_files( + &self, + _predicate: Option, + ) -> DeltaResult>>> { + todo!() + } + fn files( &self, - predicate: impl Into>, - ) -> DeltaResult>> { - Ok(std::iter::once(scan_as_log_data( + predicate: Option, + ) -> DeltaResult>>> { + Ok(Box::new(std::iter::once(scan_as_log_data( &self.snapshot, vec![(self.file_data()?.clone(), false)], predicate, - ))) + )))) } - fn tombstones(&self) -> DeltaResult>> { + fn tombstones(&self) -> DeltaResult>>> { self.snapshot.tombstones() } @@ -74,22 +79,19 @@ impl Snapshot for EagerSnapshot { self.snapshot.application_transactions() } - fn application_transaction( - &self, - app_id: impl AsRef, - ) -> DeltaResult> { + fn application_transaction(&self, app_id: &str) -> DeltaResult> { self.snapshot.application_transaction(app_id) } fn commit_infos( &self, - start_version: impl Into>, - limit: impl Into>, - ) -> DeltaResult> { + start_version: Option, + limit: Option, + ) -> DeltaResult>> { self.snapshot.commit_infos(start_version, limit) } - fn update(&mut self, target_version: impl Into>) -> DeltaResult { + fn update(&mut self, target_version: Option) -> DeltaResult { self.update_impl(target_version) } } @@ -106,15 +108,11 @@ impl EagerSnapshot { LazySnapshot::try_new(Table::try_from_uri(table_root)?, store, version).await?; let files = config .require_files - .then(|| -> DeltaResult<_> { replay_file_actions(&snapshot) }) + .then(|| -> DeltaResult<_> { replay_file_actions(&snapshot, None) }) .transpose()?; Ok(Self { snapshot, files }) } - pub(crate) fn engine_ref(&self) -> &Arc { - self.snapshot.engine_ref() - } - pub fn file_data(&self) -> DeltaResult<&RecordBatch> { Ok(self .files @@ -135,18 +133,16 @@ impl EagerSnapshot { .ok_or(SnapshotError::FilesNotInitialized)?) } - pub(crate) fn update_impl( - &mut self, - target_version: impl Into>, - ) -> DeltaResult { - let target_version = target_version.into(); - + fn update_impl(&mut self, target_version: Option) -> DeltaResult { let mut snapshot = self.snapshot.clone(); if !snapshot.update(target_version.clone())? { return Ok(false); } - let log_root = snapshot.table_root().join("_delta_log/").unwrap(); + let log_root = snapshot + .table_root() + .join("_delta_log/") + .map_err(|e| DeltaTableError::generic(e))?; let fs_client = snapshot.engine_ref().get_file_system_client(); let commit_read_schema = get_log_schema().project(&[ADD_NAME, REMOVE_NAME])?; let checkpoint_read_schema = get_log_add_schema().clone(); @@ -181,121 +177,8 @@ impl EagerSnapshot { )); self.files = Some(scan_as_log_data(&self.snapshot, slice_iter, None)?); + self.snapshot = snapshot; Ok(true) } } - -fn replay_file_actions(snapshot: &LazySnapshot) -> DeltaResult { - let commit_read_schema = get_log_schema().project(&[ADD_NAME, REMOVE_NAME])?; - let checkpoint_read_schema = get_log_add_schema().clone(); - - let curr_data = snapshot - .inner - ._log_segment() - .replay( - snapshot.engine_ref().as_ref(), - commit_read_schema.clone(), - checkpoint_read_schema.clone(), - None, - )? - .map_ok( - |(data, flag)| -> Result<(RecordBatch, bool), delta_kernel::Error> { - Ok((ArrowEngineData::try_from_engine_data(data)?.into(), flag)) - }, - ) - .flatten() - .collect::, _>>()?; - - scan_as_log_data(snapshot, curr_data, None) -} - -fn scan_as_log_data( - snapshot: &LazySnapshot, - curr_data: Vec<(RecordBatch, bool)>, - predicate: impl Into>, -) -> Result { - let scan_iter = curr_data.clone().into_iter().map(|(data, flag)| { - Ok(( - Box::new(ArrowEngineData::new(data.clone())) as Box, - flag, - )) - }); - - let scan = snapshot - .inner - .clone() - .scan_builder() - .with_predicate(predicate) - .build()?; - - let res = scan_action_iter( - snapshot.engine_ref().as_ref(), - scan_iter, - scan.physical_predicate() - .map(|p| (p, scan.schema().clone())), - ) - .map(|res| { - res.and_then(|(d, selection)| { - Ok(( - RecordBatch::from(ArrowEngineData::try_from_engine_data(d)?), - selection, - )) - }) - }) - .zip(curr_data.into_iter()) - .map(|(scan_res, (data_raw, _))| match scan_res { - Ok((_, selection)) => { - let data = filter_record_batch(&data_raw, &BooleanArray::from(selection))?; - Ok(data.project(&[0])?) - } - Err(e) => Err(e), - }) - .collect::, _>>()?; - - Ok(concat_batches(res[0].schema_ref(), &res)?) -} - -#[cfg(test)] -mod tests { - use deltalake_test::acceptance::{read_dat_case, TestCaseInfo}; - use deltalake_test::TestResult; - - use super::super::tests::get_dat_dir; - use super::*; - - #[tokio::test] - async fn load_eager_snapshot() -> TestResult<()> { - let mut dat_dir = get_dat_dir(); - dat_dir.push("multi_partitioned"); - - let dat_info: TestCaseInfo = read_dat_case(dat_dir)?; - let table_info = dat_info.table_summary()?; - - let table = Table::try_from_uri(dat_info.table_root()?)?; - - let mut snapshot = EagerSnapshot::try_new( - table.location(), - Arc::new(object_store::local::LocalFileSystem::default()), - Default::default(), - 0, - ) - .await?; - - println!("before update"); - - // assert_eq!(snapshot.version(), table_info.version); - // assert_eq!( - // snapshot.protocol().min_reader_version(), - // table_info.min_reader_version - // ); - - snapshot.update(None)?; - - for file in snapshot.file_actions()? { - println!("file: {:#?}", file.unwrap()); - } - - Ok(()) - } -} diff --git a/crates/core/src/kernel/snapshot_next/iterators.rs b/crates/core/src/kernel/snapshot_next/iterators.rs index 9a01e24254..38aa0e1c2f 100644 --- a/crates/core/src/kernel/snapshot_next/iterators.rs +++ b/crates/core/src/kernel/snapshot_next/iterators.rs @@ -4,8 +4,7 @@ use std::sync::Arc; use arrow_array::cast::AsArray; use arrow_array::types::Int64Type; use arrow_array::{ - Array, ArrayRef, BooleanArray, Int64Array, RecordBatch, RecordBatchReader, StringArray, - StructArray, + Array, ArrayRef, BooleanArray, Int64Array, RecordBatch, StringArray, StructArray, }; use chrono::{DateTime, Utc}; use delta_kernel::actions::visitors::AddVisitor; @@ -27,10 +26,7 @@ pub struct AddIterator<'a> { impl AddIterator<'_> { pub fn try_new(actions: &RecordBatch) -> DeltaResult> { - validate_column::(actions, &[ADD_NAME, "path"])?; - validate_column::(actions, &[ADD_NAME, "size"])?; - validate_column::(actions, &[ADD_NAME, "modificationTime"])?; - validate_column::(actions, &[ADD_NAME, "dataChange"])?; + validate_add(&actions)?; let visitor = AddVisitor::new(); let fields = visitor.selected_column_names_and_types(); diff --git a/crates/core/src/kernel/snapshot_next/lazy.rs b/crates/core/src/kernel/snapshot_next/lazy.rs index 1a360d8371..572b4ead37 100644 --- a/crates/core/src/kernel/snapshot_next/lazy.rs +++ b/crates/core/src/kernel/snapshot_next/lazy.rs @@ -24,7 +24,7 @@ use object_store::ObjectStore; use url::Url; use super::cache::CommitCacheObjectStore; -use super::Snapshot; +use super::{replay_file_actions, Snapshot}; use crate::kernel::{Action, CommitInfo}; use crate::{DeltaResult, DeltaTableError}; @@ -61,45 +61,57 @@ impl Snapshot for LazySnapshot { self.inner.table_properties() } - fn files( + fn logical_files( &self, - predicate: impl Into>>, - ) -> DeltaResult>> { + predicate: Option, + ) -> DeltaResult>>> { let scan = self .inner .clone() .scan_builder() .with_predicate(predicate) .build()?; - Ok(scan - .scan_data(self.engine.as_ref())? - .map(|res| { - res.and_then(|(data, predicate)| { - let batch: RecordBatch = ArrowEngineData::try_from_engine_data(data)?.into(); - Ok(filter_record_batch(&batch, &BooleanArray::from(predicate))?) + Ok(Box::new( + scan.scan_data(self.engine.as_ref())? + .map(|res| { + res.and_then(|(data, predicate)| { + let batch: RecordBatch = + ArrowEngineData::try_from_engine_data(data)?.into(); + Ok(filter_record_batch(&batch, &BooleanArray::from(predicate))?) + }) }) - }) - .map(|batch| batch.map_err(|e| e.into()))) + .map(|batch| batch.map_err(|e| e.into())), + )) + } + + fn files( + &self, + predicate: Option>, + ) -> DeltaResult>>> { + Ok(Box::new(std::iter::once(replay_file_actions( + &self, predicate, + )))) } - fn tombstones(&self) -> DeltaResult>> { + fn tombstones(&self) -> DeltaResult>>> { static META_PREDICATE: LazyLock> = LazyLock::new(|| { Some(Arc::new( Expression::column([REMOVE_NAME, "path"]).is_not_null(), )) }); let read_schema = get_log_schema().project(&[REMOVE_NAME])?; - Ok(self - .inner - ._log_segment() - .replay( - self.engine.as_ref(), - read_schema.clone(), - read_schema, - META_PREDICATE.clone(), - )? - .map_ok(|(d, _)| Ok(RecordBatch::from(ArrowEngineData::try_from_engine_data(d)?))) - .flatten()) + Ok(Box::new( + self.inner + ._log_segment() + .replay( + self.engine.as_ref(), + read_schema.clone(), + read_schema, + META_PREDICATE.clone(), + )? + .map_ok(|(d, _)| Ok(RecordBatch::from(ArrowEngineData::try_from_engine_data(d)?))) + .flatten(), + )) } fn application_transactions(&self) -> DeltaResult { @@ -107,24 +119,20 @@ impl Snapshot for LazySnapshot { Ok(scanner.application_transactions(self.engine.as_ref())?) } - fn application_transaction( - &self, - app_id: impl AsRef, - ) -> DeltaResult> { + fn application_transaction(&self, app_id: &str) -> DeltaResult> { let scanner = SetTransactionScanner::new(self.inner.clone()); - Ok(scanner.application_transaction(self.engine.as_ref(), app_id.as_ref())?) + Ok(scanner.application_transaction(self.engine.as_ref(), app_id)?) } fn commit_infos( &self, - start_version: impl Into>, - limit: impl Into>, - ) -> DeltaResult> { + start_version: Option, + limit: Option, + ) -> DeltaResult>> { // let start_version = start_version.into(); let fs_client = self.engine.get_file_system_client(); - let end_version = start_version.into().unwrap_or_else(|| self.version()); + let end_version = start_version.unwrap_or_else(|| self.version()); let start_version = limit - .into() .and_then(|limit| { if limit == 0 { Some(end_version) @@ -133,6 +141,7 @@ impl Snapshot for LazySnapshot { } }) .unwrap_or(0); + let log_root = self.inner.table_root().join("_delta_log").unwrap(); let mut log_segment = LogSegment::for_table_changes( fs_client.as_ref(), @@ -147,27 +156,29 @@ impl Snapshot for LazySnapshot { .map(|commit_file| (commit_file.location.location.clone(), None)) .collect_vec(); - Ok(fs_client - .read_files(files)? - .zip(log_segment.ascending_commit_files.into_iter()) - .filter_map(|(data, path)| { - data.ok().and_then(|d| { - let reader = BufReader::new(Cursor::new(d)); - for line in reader.lines() { - match line.and_then(|l| Ok(serde_json::from_str::(&l)?)) { - Ok(Action::CommitInfo(commit_info)) => { - return Some((path.version, commit_info)) - } - Err(e) => return None, - _ => continue, - }; - } - None - }) - })) + Ok(Box::new( + fs_client + .read_files(files)? + .zip(log_segment.ascending_commit_files.into_iter()) + .filter_map(|(data, path)| { + data.ok().and_then(|d| { + let reader = BufReader::new(Cursor::new(d)); + for line in reader.lines() { + match line.and_then(|l| Ok(serde_json::from_str::(&l)?)) { + Ok(Action::CommitInfo(commit_info)) => { + return Some((path.version, commit_info)) + } + Err(_) => return None, + _ => continue, + }; + } + None + }) + }), + )) } - fn update(&mut self, target_version: impl Into>) -> DeltaResult { + fn update(&mut self, target_version: Option) -> DeltaResult { let mut snapshot = self.inner.as_ref().clone(); let did_update = snapshot.update(target_version, self.engine_ref().as_ref())?; self.inner = Arc::new(snapshot); diff --git a/crates/core/src/kernel/snapshot_next/mod.rs b/crates/core/src/kernel/snapshot_next/mod.rs index 9acd55494d..817d94e477 100644 --- a/crates/core/src/kernel/snapshot_next/mod.rs +++ b/crates/core/src/kernel/snapshot_next/mod.rs @@ -1,13 +1,23 @@ //! Snapshot of a Delta Table at a specific version. -use arrow_array::RecordBatch; +use std::sync::Arc; + +use arrow_array::{BooleanArray, RecordBatch, StructArray}; +use arrow_select::concat::concat_batches; +use arrow_select::filter::filter_record_batch; use delta_kernel::actions::visitors::SetTransactionMap; -use delta_kernel::actions::{Add, Metadata, Protocol, SetTransaction}; +use delta_kernel::actions::{ + get_log_add_schema, get_log_schema, Metadata, Protocol, SetTransaction, ADD_NAME, REMOVE_NAME, +}; +use delta_kernel::engine::arrow_data::ArrowEngineData; +use delta_kernel::engine::arrow_expression::apply_schema; use delta_kernel::expressions::{Scalar, StructData}; -use delta_kernel::schema::Schema; +use delta_kernel::scan::log_replay::scan_action_iter; +use delta_kernel::schema::{DataType, Schema}; use delta_kernel::table_properties::TableProperties; -use delta_kernel::{ExpressionRef, Version}; +use delta_kernel::{EngineData, ExpressionRef, Version}; use iterators::{AddView, AddViewIterator}; +use itertools::Itertools; use url::Url; use crate::kernel::actions::CommitInfo; @@ -91,14 +101,19 @@ pub trait Snapshot { /// An iterator of [`RecordBatch`]es, where each batch contains add action data. fn files( &self, - predicate: impl Into>, - ) -> DeltaResult>>; + predicate: Option, + ) -> DeltaResult>>>; + + fn logical_files( + &self, + predicate: Option, + ) -> DeltaResult>>>; fn files_view( &self, - predicate: impl Into>, - ) -> DeltaResult>> { - Ok(AddViewIterator::new(self.files(predicate)?)) + predicate: Option, + ) -> DeltaResult>>> { + Ok(Box::new(AddViewIterator::new(self.files(predicate)?))) } /// Get all tombstones in the table. @@ -108,7 +123,7 @@ pub trait Snapshot { /// /// # Returns /// An iterator of [`RecordBatch`]es, where each batch contains remove action data. - fn tombstones(&self) -> DeltaResult>>; + fn tombstones(&self) -> DeltaResult>>>; /// Scan the Delta Log to obtain the latest transaction for all applications /// @@ -127,10 +142,7 @@ pub trait Snapshot { /// /// # Returns /// The latest transaction for the given application id, if it exists. - fn application_transaction( - &self, - app_id: impl AsRef, - ) -> DeltaResult>; + fn application_transaction(&self, app_id: &str) -> DeltaResult>; /// Get commit info for the table. /// @@ -152,9 +164,9 @@ pub trait Snapshot { // the definition form kernel, once handling over there matured. fn commit_infos( &self, - start_version: impl Into>, - limit: impl Into>, - ) -> DeltaResult>; + start_version: Option, + limit: Option, + ) -> DeltaResult>>; /// Update the snapshot to a specific version. /// @@ -165,7 +177,7 @@ pub trait Snapshot { /// /// # Returns /// A boolean indicating if the snapshot was updated. - fn update(&mut self, target_version: impl Into>) -> DeltaResult; + fn update(&mut self, target_version: Option) -> DeltaResult; } impl Snapshot for Box { @@ -193,14 +205,21 @@ impl Snapshot for Box { self.as_ref().table_properties() } + fn logical_files( + &self, + predicate: Option, + ) -> DeltaResult>>> { + self.as_ref().logical_files(predicate) + } + fn files( &self, - predicate: impl Into>, - ) -> DeltaResult>> { + predicate: Option, + ) -> DeltaResult>>> { self.as_ref().files(predicate) } - fn tombstones(&self) -> DeltaResult>> { + fn tombstones(&self) -> DeltaResult>>> { self.as_ref().tombstones() } @@ -208,29 +227,114 @@ impl Snapshot for Box { self.as_ref().application_transactions() } - fn application_transaction( - &self, - app_id: impl AsRef, - ) -> DeltaResult> { + fn application_transaction(&self, app_id: &str) -> DeltaResult> { self.as_ref().application_transaction(app_id) } fn commit_infos( &self, - start_version: impl Into>, - limit: impl Into>, - ) -> DeltaResult> { + start_version: Option, + limit: Option, + ) -> DeltaResult>> { self.as_ref().commit_infos(start_version, limit) } - fn update(&mut self, target_version: impl Into>) -> DeltaResult { + fn update(&mut self, target_version: Option) -> DeltaResult { self.as_mut().update(target_version) } } +fn replay_file_actions( + snapshot: &LazySnapshot, + predicate: impl Into>, +) -> DeltaResult { + let commit_read_schema = get_log_schema().project(&[ADD_NAME, REMOVE_NAME])?; + let checkpoint_read_schema = get_log_add_schema().clone(); + + let curr_data = snapshot + .inner + ._log_segment() + .replay( + snapshot.engine_ref().as_ref(), + commit_read_schema.clone(), + checkpoint_read_schema.clone(), + None, + )? + .map_ok( + |(data, flag)| -> Result<(RecordBatch, bool), delta_kernel::Error> { + Ok((ArrowEngineData::try_from_engine_data(data)?.into(), flag)) + }, + ) + .flatten() + .collect::, _>>()?; + + scan_as_log_data(snapshot, curr_data, predicate) +} + +// helper function to replay log data as stored using kernel log replay. +// The kernel replay usually emits a tuple of (data, selection) where data is the +// data is a re-ordered subset of the full data in the log which is relevant to the +// engine. this function leverages the replay, but applies the selection to the +// original data to get the final data. +fn scan_as_log_data( + snapshot: &LazySnapshot, + curr_data: impl IntoIterator, + predicate: impl Into>, +) -> Result { + let curr_data = curr_data.into_iter().collect::>(); + let scan_iter = curr_data.clone().into_iter().map(|(data, flag)| { + Ok(( + Box::new(ArrowEngineData::new(data.clone())) as Box, + flag, + )) + }); + + let scan = snapshot + .inner + .as_ref() + .clone() + .into_scan_builder() + .with_predicate(predicate) + .build()?; + + let res = scan_action_iter( + snapshot.engine_ref().as_ref(), + scan_iter, + scan.physical_predicate() + .map(|p| (p, scan.schema().clone())), + ) + .map(|res| { + res.and_then(|(d, selection)| { + Ok(( + RecordBatch::from(ArrowEngineData::try_from_engine_data(d)?), + selection, + )) + }) + }) + .zip(curr_data.into_iter()) + .map(|(scan_res, (data_raw, _))| match scan_res { + Ok((_, selection)) => { + let data = filter_record_batch(&data_raw, &BooleanArray::from(selection))?; + let dt: DataType = get_log_add_schema().as_ref().clone().into(); + let data: StructArray = data.project(&[0])?.into(); + apply_schema(&data, &dt) + } + Err(e) => Err(e), + }) + .collect::, _>>()?; + + let schema_ref = Arc::new(get_log_add_schema().as_ref().try_into()?); + Ok(concat_batches(&schema_ref, &res)?) +} + #[cfg(test)] mod tests { - use std::path::PathBuf; + use std::{future::Future, path::PathBuf, pin::Pin}; + + use delta_kernel::Table; + use deltalake_test::utils::*; + + use super::*; pub(super) fn get_dat_dir() -> PathBuf { let d = PathBuf::from(env!("CARGO_MANIFEST_DIR")); @@ -242,4 +346,99 @@ mod tests { rep_root.push("dat/out/reader_tests/generated"); rep_root } + + fn get_lazy( + ctx: &IntegrationContext, + table: TestTables, + version: Option, + ) -> TestResult>>>>> { + let store = ctx.table_builder(table).build_storage()?.object_store(None); + let table = Table::try_from_uri("memory:///")?; + Ok(Box::pin(async move { + Ok(Box::new(LazySnapshot::try_new(table, store, version).await?) as Box) + })) + } + + fn get_eager( + ctx: &IntegrationContext, + table: TestTables, + version: Option, + ) -> TestResult>>>>> { + let store = ctx.table_builder(table).build_storage()?.object_store(None); + let config = Default::default(); + Ok(Box::pin(async move { + Ok( + Box::new(EagerSnapshot::try_new("memory:///", store, config, version).await?) + as Box, + ) + })) + } + + #[tokio::test] + async fn test_snapshots() -> TestResult { + let context = IntegrationContext::new(Box::::default())?; + context.load_table(TestTables::Checkpoints).await?; + context.load_table(TestTables::Simple).await?; + context.load_table(TestTables::SimpleWithCheckpoint).await?; + context.load_table(TestTables::WithDvSmall).await?; + + test_snapshot(&context, get_lazy).await?; + test_snapshot(&context, get_eager).await?; + + Ok(()) + } + + // NOTE: test needs to be async, so that we can pick up the runtime from the context + async fn test_snapshot(ctx: &IntegrationContext, get_snapshot: F) -> TestResult<()> + where + F: Fn( + &IntegrationContext, + TestTables, + Option, + ) -> TestResult>>>>>, + { + for version in 0..=12 { + let snapshot = get_snapshot(ctx, TestTables::Checkpoints, Some(version))?.await?; + assert_eq!(snapshot.version(), version); + + test_files(snapshot.as_ref())?; + test_files_view(snapshot.as_ref())?; + test_commit_infos(snapshot.as_ref())?; + } + + let mut snapshot = get_snapshot(ctx, TestTables::Checkpoints, Some(0))?.await?; + for version in 1..=12 { + snapshot.update(Some(version))?; + assert_eq!(snapshot.version(), version); + + test_files(snapshot.as_ref())?; + test_files_view(snapshot.as_ref())?; + test_commit_infos(snapshot.as_ref())?; + } + + Ok(()) + } + + fn test_files(snapshot: &dyn Snapshot) -> TestResult<()> { + let batches = snapshot.files(None)?.collect::, _>>()?; + let num_files = batches.iter().map(|b| b.num_rows() as i64).sum::(); + assert_eq!((num_files as u64), snapshot.version()); + Ok(()) + } + + fn test_commit_infos(snapshot: &dyn Snapshot) -> TestResult<()> { + let commit_infos = snapshot.commit_infos(None, Some(100))?.collect::>(); + assert_eq!((commit_infos.len() as u64), snapshot.version() + 1); + assert_eq!(commit_infos.first().unwrap().0, snapshot.version()); + Ok(()) + } + + fn test_files_view(snapshot: &dyn Snapshot) -> TestResult<()> { + let num_files_view = snapshot + .files_view(None)? + .map(|f| f.unwrap().path().to_string()) + .count() as u64; + assert_eq!(num_files_view, snapshot.version()); + Ok(()) + } } From e83c3caaf68aae00d27d719c0005b681c9f8a492 Mon Sep 17 00:00:00 2001 From: Robert Pack Date: Tue, 21 Jan 2025 17:28:00 +0100 Subject: [PATCH 11/23] test: more snapshot tests Signed-off-by: Robert Pack --- Cargo.toml | 10 +-- crates/core/src/kernel/snapshot_next/eager.rs | 37 +++++++++-- .../src/kernel/snapshot_next/iterators.rs | 66 ++++++++++++++++++- crates/core/src/kernel/snapshot_next/lazy.rs | 2 +- crates/core/src/kernel/snapshot_next/mod.rs | 55 ++++++++++++---- crates/core/src/operations/transaction/mod.rs | 30 +++++++-- 6 files changed, 171 insertions(+), 29 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 0bbe1e07ab..00c970fe8e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,14 +27,14 @@ debug = "line-tables-only" [workspace.dependencies] # delta_kernel = { version = "=0.6.0", features = ["default-engine"] } -delta_kernel = { path = "../delta-kernel-rs/kernel", features = [ - "default-engine", - "developer-visibility", -] } -# delta_kernel = { git = "https://github.com/roeap/delta-kernel-rs", rev = "2e09bfcc0447283a3acc320ad2350f4075dba83e", features = [ +# delta_kernel = { path = "../delta-kernel-rs/kernel", features = [ # "default-engine", # "developer-visibility", # ] } +delta_kernel = { git = "https://github.com/roeap/delta-kernel-rs", rev = "50c1c023b7e9d60df69f6e592b91e4cc06a5a0b1", features = [ + "default-engine", + "developer-visibility", +] } # arrow arrow = { version = "53" } diff --git a/crates/core/src/kernel/snapshot_next/eager.rs b/crates/core/src/kernel/snapshot_next/eager.rs index 83e0a00863..2e8ff24059 100644 --- a/crates/core/src/kernel/snapshot_next/eager.rs +++ b/crates/core/src/kernel/snapshot_next/eager.rs @@ -1,14 +1,16 @@ use std::sync::Arc; -use arrow_array::RecordBatch; +use arrow_array::{BooleanArray, RecordBatch}; +use arrow_select::filter::filter_record_batch; use delta_kernel::actions::set_transaction::SetTransactionMap; use delta_kernel::actions::{get_log_add_schema, get_log_schema, ADD_NAME, REMOVE_NAME}; use delta_kernel::actions::{Add, Metadata, Protocol, SetTransaction}; use delta_kernel::engine::arrow_data::ArrowEngineData; use delta_kernel::log_segment::LogSegment; +use delta_kernel::scan::log_replay::scan_action_iter; use delta_kernel::schema::Schema; use delta_kernel::table_properties::TableProperties; -use delta_kernel::{ExpressionRef, Table, Version}; +use delta_kernel::{EngineData, ExpressionRef, Table, Version}; use itertools::Itertools; use object_store::ObjectStore; use url::Url; @@ -55,9 +57,36 @@ impl Snapshot for EagerSnapshot { fn logical_files( &self, - _predicate: Option, + predicate: Option, ) -> DeltaResult>>> { - todo!() + let scan = self + .snapshot + .inner + .as_ref() + .clone() + .into_scan_builder() + .with_predicate(predicate) + .build()?; + + let iter = scan_action_iter( + self.snapshot.engine_ref().as_ref(), + vec![Ok(( + Box::new(ArrowEngineData::new(self.file_data()?.clone())) as Box, + false, + ))] + .into_iter(), + scan.physical_predicate() + .map(|p| (p, scan.schema().clone())), + ) + .map(|res| { + res.and_then(|(data, predicate)| { + let batch: RecordBatch = ArrowEngineData::try_from_engine_data(data)?.into(); + Ok(filter_record_batch(&batch, &BooleanArray::from(predicate))?) + }) + }) + .map(|batch| batch.map_err(|e| e.into())); + + Ok(Box::new(iter)) } fn files( diff --git a/crates/core/src/kernel/snapshot_next/iterators.rs b/crates/core/src/kernel/snapshot_next/iterators.rs index 38aa0e1c2f..c353881e2c 100644 --- a/crates/core/src/kernel/snapshot_next/iterators.rs +++ b/crates/core/src/kernel/snapshot_next/iterators.rs @@ -1,5 +1,5 @@ use std::collections::HashSet; -use std::sync::Arc; +use std::sync::{Arc, LazyLock}; use arrow_array::cast::AsArray; use arrow_array::types::Int64Type; @@ -14,6 +14,7 @@ use delta_kernel::engine::arrow_data::ArrowEngineData; use delta_kernel::engine::arrow_expression::ProvidesColumnByName; use delta_kernel::engine_data::{GetData, RowVisitor}; use delta_kernel::expressions::{Scalar, StructData}; +use delta_kernel::scan::scan_row_schema; use crate::kernel::scalars::ScalarExt; use crate::{DeltaResult, DeltaTableError}; @@ -218,6 +219,69 @@ impl Iterator for LogicalFileView { } } +pub struct LogicalFileViewIterator +where + I: IntoIterator>, +{ + inner: I::IntoIter, + batch: Option, + current: usize, +} + +impl LogicalFileViewIterator +where + I: IntoIterator>, +{ + /// Create a new [LogicalFileViewIterator]. + /// + /// If `iter` is an infallible iterator, use `.map(Ok)`. + pub fn new(iter: I) -> Self { + Self { + inner: iter.into_iter(), + batch: None, + current: 0, + } + } +} + +// impl Iterator for LogicalFileViewIterator +// where +// I: IntoIterator>, +// { +// type Item = DeltaResult; +// +// fn next(&mut self) -> Option { +// if let Some(batch) = &self.batch { +// if self.current < batch.num_rows() { +// let item = LogicalFileView { +// files: batch.clone(), +// index: self.current, +// }; +// self.current += 1; +// return Some(Ok(item)); +// } +// } +// match self.inner.next() { +// Some(Ok(batch)) => { +// if validate_logical_file(&batch).is_err() { +// return Some(Err(DeltaTableError::generic( +// "Invalid logical file data encountered.", +// ))); +// } +// self.batch = Some(batch); +// self.current = 0; +// self.next() +// } +// Some(Err(e)) => Some(Err(e)), +// None => None, +// } +// } +// +// fn size_hint(&self) -> (usize, Option) { +// self.inner.size_hint() +// } +// } + pub struct AddViewIterator where I: IntoIterator>, diff --git a/crates/core/src/kernel/snapshot_next/lazy.rs b/crates/core/src/kernel/snapshot_next/lazy.rs index 572b4ead37..20ccfb7031 100644 --- a/crates/core/src/kernel/snapshot_next/lazy.rs +++ b/crates/core/src/kernel/snapshot_next/lazy.rs @@ -3,8 +3,8 @@ use std::io::{BufRead, BufReader, Cursor}; use std::sync::{Arc, LazyLock}; -use arrow::compute::filter_record_batch; use arrow_array::{BooleanArray, RecordBatch}; +use arrow_select::filter::filter_record_batch; use delta_kernel::actions::set_transaction::{SetTransactionMap, SetTransactionScanner}; use delta_kernel::actions::{get_log_schema, REMOVE_NAME}; use delta_kernel::actions::{Metadata, Protocol, SetTransaction}; diff --git a/crates/core/src/kernel/snapshot_next/mod.rs b/crates/core/src/kernel/snapshot_next/mod.rs index 817d94e477..0023f4236d 100644 --- a/crates/core/src/kernel/snapshot_next/mod.rs +++ b/crates/core/src/kernel/snapshot_next/mod.rs @@ -13,6 +13,7 @@ use delta_kernel::engine::arrow_data::ArrowEngineData; use delta_kernel::engine::arrow_expression::apply_schema; use delta_kernel::expressions::{Scalar, StructData}; use delta_kernel::scan::log_replay::scan_action_iter; +use delta_kernel::scan::scan_row_schema; use delta_kernel::schema::{DataType, Schema}; use delta_kernel::table_properties::TableProperties; use delta_kernel::{EngineData, ExpressionRef, Version}; @@ -92,19 +93,34 @@ pub trait Snapshot { /// Get the [`TableProperties`] for this [`Snapshot`]. fn table_properties(&self) -> &TableProperties; - /// Get all currently active files in the table. + fn logical_file_schema(&self) -> &'static Schema { + scan_row_schema() + } + + /// Get all logical files present in the current snapshot. /// /// # Parameters /// - `predicate`: An optional predicate to filter the files based on file statistics. /// /// # Returns - /// An iterator of [`RecordBatch`]es, where each batch contains add action data. - fn files( + /// An iterator of [`RecordBatch`]es, where each batch contains logical file data. + fn logical_files( &self, predicate: Option, ) -> DeltaResult>>>; - fn logical_files( + /// Get all currently active files in the table. + /// + /// # Parameters + /// - `predicate`: An optional predicate to filter the files based on file statistics. + /// + /// # Returns + /// An iterator of [`RecordBatch`]es, where each batch contains add action data. + #[deprecated( + since = "0.25.0", + note = "Use `logical_files` instead, which returns a more focussed dataset and avoids computational overhead." + )] + fn files( &self, predicate: Option, ) -> DeltaResult>>>; @@ -113,6 +129,7 @@ pub trait Snapshot { &self, predicate: Option, ) -> DeltaResult>>> { + #[allow(deprecated)] Ok(Box::new(AddViewIterator::new(self.files(predicate)?))) } @@ -216,6 +233,7 @@ impl Snapshot for Box { &self, predicate: Option, ) -> DeltaResult>>> { + #[allow(deprecated)] self.as_ref().files(predicate) } @@ -404,6 +422,7 @@ mod tests { test_files(snapshot.as_ref())?; test_files_view(snapshot.as_ref())?; test_commit_infos(snapshot.as_ref())?; + test_logical_files(snapshot.as_ref())?; } let mut snapshot = get_snapshot(ctx, TestTables::Checkpoints, Some(0))?.await?; @@ -414,22 +433,29 @@ mod tests { test_files(snapshot.as_ref())?; test_files_view(snapshot.as_ref())?; test_commit_infos(snapshot.as_ref())?; + test_logical_files(snapshot.as_ref())?; } Ok(()) } - fn test_files(snapshot: &dyn Snapshot) -> TestResult<()> { - let batches = snapshot.files(None)?.collect::, _>>()?; - let num_files = batches.iter().map(|b| b.num_rows() as i64).sum::(); + fn test_logical_files(snapshot: &dyn Snapshot) -> TestResult<()> { + let logical_files = snapshot + .logical_files(None)? + .collect::, _>>()?; + let num_files = logical_files + .iter() + .map(|b| b.num_rows() as i64) + .sum::(); assert_eq!((num_files as u64), snapshot.version()); Ok(()) } - fn test_commit_infos(snapshot: &dyn Snapshot) -> TestResult<()> { - let commit_infos = snapshot.commit_infos(None, Some(100))?.collect::>(); - assert_eq!((commit_infos.len() as u64), snapshot.version() + 1); - assert_eq!(commit_infos.first().unwrap().0, snapshot.version()); + fn test_files(snapshot: &dyn Snapshot) -> TestResult<()> { + #[allow(deprecated)] + let batches = snapshot.files(None)?.collect::, _>>()?; + let num_files = batches.iter().map(|b| b.num_rows() as i64).sum::(); + assert_eq!((num_files as u64), snapshot.version()); Ok(()) } @@ -441,4 +467,11 @@ mod tests { assert_eq!(num_files_view, snapshot.version()); Ok(()) } + + fn test_commit_infos(snapshot: &dyn Snapshot) -> TestResult<()> { + let commit_infos = snapshot.commit_infos(None, Some(100))?.collect::>(); + assert_eq!((commit_infos.len() as u64), snapshot.version() + 1); + assert_eq!(commit_infos.first().unwrap().0, snapshot.version()); + Ok(()) + } } diff --git a/crates/core/src/operations/transaction/mod.rs b/crates/core/src/operations/transaction/mod.rs index bb173c40b7..d7d99f0dbe 100644 --- a/crates/core/src/operations/transaction/mod.rs +++ b/crates/core/src/operations/transaction/mod.rs @@ -80,6 +80,7 @@ use bytes::Bytes; use chrono::Utc; use conflict_checker::ConflictChecker; use futures::future::BoxFuture; +use itertools::Itertools; use object_store::path::Path; use object_store::Error as ObjectStoreError; use serde_json::Value; @@ -268,22 +269,37 @@ pub struct CommitData { impl CommitData { /// Create new data to be committed pub fn new( - mut actions: Vec, + actions: Vec, operation: DeltaOperation, mut app_metadata: HashMap, app_transactions: Vec, ) -> Self { - if !actions.iter().any(|a| matches!(a, Action::CommitInfo(..))) { - let mut commit_info = operation.get_commit_info(); - commit_info.timestamp = Some(Utc::now().timestamp_millis()); + // When in-commit-timestamps are enabled, we need to ensure that the commit info is the first action + // in the commit log. If it is not present, we need to add it. + // https://github.com/delta-io/delta/blob/master/PROTOCOL.md#writer-requirements-for-in-commit-timestamps + let mut commit_info = None::; + let mut actions = actions + .into_iter() + .inspect(|a| { + if matches!(a, Action::CommitInfo(..)) { + commit_info = Some(a.clone()) + } + }) + .filter(|a| matches!(a, Action::CommitInfo(..))) + .collect_vec(); + if !commit_info.is_some() { + let mut cm = operation.get_commit_info(); + cm.timestamp = Some(Utc::now().timestamp_millis()); app_metadata.insert( "clientVersion".to_string(), Value::String(format!("delta-rs.{}", crate_version())), ); - app_metadata.extend(commit_info.info); - commit_info.info = app_metadata.clone(); - actions.push(Action::CommitInfo(commit_info)) + app_metadata.extend(cm.info); + cm.info = app_metadata.clone(); + commit_info = Some(Action::CommitInfo(cm)); } + // safety: we assured commit_info is Some just above. + actions.insert(0, commit_info.unwrap()); for txn in &app_transactions { actions.push(Action::Txn(txn.clone())) From 5364f4a7e2141c947e76f668725e1e5124dd3106 Mon Sep 17 00:00:00 2001 From: Robert Pack Date: Tue, 21 Jan 2025 18:58:39 +0100 Subject: [PATCH 12/23] feat: allow iterating over logical files Signed-off-by: Robert Pack --- .../src/kernel/snapshot_next/iterators.rs | 103 ++++++++---------- crates/core/src/kernel/snapshot_next/mod.rs | 28 ++++- 2 files changed, 74 insertions(+), 57 deletions(-) diff --git a/crates/core/src/kernel/snapshot_next/iterators.rs b/crates/core/src/kernel/snapshot_next/iterators.rs index c353881e2c..1bfec67eec 100644 --- a/crates/core/src/kernel/snapshot_next/iterators.rs +++ b/crates/core/src/kernel/snapshot_next/iterators.rs @@ -1,5 +1,5 @@ use std::collections::HashSet; -use std::sync::{Arc, LazyLock}; +use std::sync::Arc; use arrow_array::cast::AsArray; use arrow_array::types::Int64Type; @@ -14,7 +14,6 @@ use delta_kernel::engine::arrow_data::ArrowEngineData; use delta_kernel::engine::arrow_expression::ProvidesColumnByName; use delta_kernel::engine_data::{GetData, RowVisitor}; use delta_kernel::expressions::{Scalar, StructData}; -use delta_kernel::scan::scan_row_schema; use crate::kernel::scalars::ScalarExt; use crate::{DeltaResult, DeltaTableError}; @@ -202,23 +201,6 @@ impl LogicalFileView { } } -impl Iterator for LogicalFileView { - type Item = LogicalFileView; - - fn next(&mut self) -> Option { - if self.index < self.files.num_rows() { - let file = LogicalFileView { - files: self.files.clone(), - index: self.index, - }; - self.index += 1; - Some(file) - } else { - None - } - } -} - pub struct LogicalFileViewIterator where I: IntoIterator>, @@ -244,43 +226,43 @@ where } } -// impl Iterator for LogicalFileViewIterator -// where -// I: IntoIterator>, -// { -// type Item = DeltaResult; -// -// fn next(&mut self) -> Option { -// if let Some(batch) = &self.batch { -// if self.current < batch.num_rows() { -// let item = LogicalFileView { -// files: batch.clone(), -// index: self.current, -// }; -// self.current += 1; -// return Some(Ok(item)); -// } -// } -// match self.inner.next() { -// Some(Ok(batch)) => { -// if validate_logical_file(&batch).is_err() { -// return Some(Err(DeltaTableError::generic( -// "Invalid logical file data encountered.", -// ))); -// } -// self.batch = Some(batch); -// self.current = 0; -// self.next() -// } -// Some(Err(e)) => Some(Err(e)), -// None => None, -// } -// } -// -// fn size_hint(&self) -> (usize, Option) { -// self.inner.size_hint() -// } -// } +impl Iterator for LogicalFileViewIterator +where + I: IntoIterator>, +{ + type Item = DeltaResult; + + fn next(&mut self) -> Option { + if let Some(batch) = &self.batch { + if self.current < batch.num_rows() { + let item = LogicalFileView { + files: batch.clone(), + index: self.current, + }; + self.current += 1; + return Some(Ok(item)); + } + } + match self.inner.next() { + Some(Ok(batch)) => { + if validate_logical_file(&batch).is_err() { + return Some(Err(DeltaTableError::generic( + "Invalid logical file data encountered.", + ))); + } + self.batch = Some(batch); + self.current = 0; + self.next() + } + Some(Err(e)) => Some(Err(e)), + None => None, + } + } + + fn size_hint(&self) -> (usize, Option) { + self.inner.size_hint() + } +} pub struct AddViewIterator where @@ -353,6 +335,15 @@ pub(crate) fn validate_add(batch: &RecordBatch) -> DeltaResult<()> { Ok(()) } +fn validate_logical_file(batch: &RecordBatch) -> DeltaResult<()> { + validate_column::(batch, &["path"])?; + validate_column::(batch, &["size"])?; + validate_column::(batch, &["modificationTime"])?; + // validate_column::(batch, &["deletionVector"])?; + // validate_column::(batch, &["fileConstantValues"])?; + Ok(()) +} + fn validate_column<'a, T: Array + 'static>( actions: &'a RecordBatch, col: &'a [impl AsRef], diff --git a/crates/core/src/kernel/snapshot_next/mod.rs b/crates/core/src/kernel/snapshot_next/mod.rs index 0023f4236d..a415f71a99 100644 --- a/crates/core/src/kernel/snapshot_next/mod.rs +++ b/crates/core/src/kernel/snapshot_next/mod.rs @@ -17,7 +17,7 @@ use delta_kernel::scan::scan_row_schema; use delta_kernel::schema::{DataType, Schema}; use delta_kernel::table_properties::TableProperties; use delta_kernel::{EngineData, ExpressionRef, Version}; -use iterators::{AddView, AddViewIterator}; +use iterators::{AddView, AddViewIterator, LogicalFileView, LogicalFileViewIterator}; use itertools::Itertools; use url::Url; @@ -109,6 +109,16 @@ pub trait Snapshot { predicate: Option, ) -> DeltaResult>>>; + fn logical_files_view( + &self, + predicate: Option, + ) -> DeltaResult>>> { + #[allow(deprecated)] + Ok(Box::new(LogicalFileViewIterator::new( + self.logical_files(predicate)?, + ))) + } + /// Get all currently active files in the table. /// /// # Parameters @@ -125,6 +135,10 @@ pub trait Snapshot { predicate: Option, ) -> DeltaResult>>>; + #[deprecated( + since = "0.25.0", + note = "Use `logical_files_view` instead, which returns a more focussed dataset and avoids computational overhead." + )] fn files_view( &self, predicate: Option, @@ -423,6 +437,7 @@ mod tests { test_files_view(snapshot.as_ref())?; test_commit_infos(snapshot.as_ref())?; test_logical_files(snapshot.as_ref())?; + test_logical_files_view(snapshot.as_ref())?; } let mut snapshot = get_snapshot(ctx, TestTables::Checkpoints, Some(0))?.await?; @@ -434,6 +449,7 @@ mod tests { test_files_view(snapshot.as_ref())?; test_commit_infos(snapshot.as_ref())?; test_logical_files(snapshot.as_ref())?; + test_logical_files_view(snapshot.as_ref())?; } Ok(()) @@ -451,6 +467,15 @@ mod tests { Ok(()) } + fn test_logical_files_view(snapshot: &dyn Snapshot) -> TestResult<()> { + let num_files_view = snapshot + .logical_files_view(None)? + .map(|f| f.unwrap().path().to_string()) + .count() as u64; + assert_eq!(num_files_view, snapshot.version()); + Ok(()) + } + fn test_files(snapshot: &dyn Snapshot) -> TestResult<()> { #[allow(deprecated)] let batches = snapshot.files(None)?.collect::, _>>()?; @@ -460,6 +485,7 @@ mod tests { } fn test_files_view(snapshot: &dyn Snapshot) -> TestResult<()> { + #[allow(deprecated)] let num_files_view = snapshot .files_view(None)? .map(|f| f.unwrap().path().to_string()) From 51349f46a93c287299cc0517d04adb3dac3c820c Mon Sep 17 00:00:00 2001 From: Robert Pack Date: Tue, 21 Jan 2025 20:08:31 +0100 Subject: [PATCH 13/23] fix: revert accidentally commited file Signed-off-by: Robert Pack --- crates/core/src/operations/transaction/mod.rs | 30 +++++-------------- 1 file changed, 7 insertions(+), 23 deletions(-) diff --git a/crates/core/src/operations/transaction/mod.rs b/crates/core/src/operations/transaction/mod.rs index d7d99f0dbe..bb173c40b7 100644 --- a/crates/core/src/operations/transaction/mod.rs +++ b/crates/core/src/operations/transaction/mod.rs @@ -80,7 +80,6 @@ use bytes::Bytes; use chrono::Utc; use conflict_checker::ConflictChecker; use futures::future::BoxFuture; -use itertools::Itertools; use object_store::path::Path; use object_store::Error as ObjectStoreError; use serde_json::Value; @@ -269,37 +268,22 @@ pub struct CommitData { impl CommitData { /// Create new data to be committed pub fn new( - actions: Vec, + mut actions: Vec, operation: DeltaOperation, mut app_metadata: HashMap, app_transactions: Vec, ) -> Self { - // When in-commit-timestamps are enabled, we need to ensure that the commit info is the first action - // in the commit log. If it is not present, we need to add it. - // https://github.com/delta-io/delta/blob/master/PROTOCOL.md#writer-requirements-for-in-commit-timestamps - let mut commit_info = None::; - let mut actions = actions - .into_iter() - .inspect(|a| { - if matches!(a, Action::CommitInfo(..)) { - commit_info = Some(a.clone()) - } - }) - .filter(|a| matches!(a, Action::CommitInfo(..))) - .collect_vec(); - if !commit_info.is_some() { - let mut cm = operation.get_commit_info(); - cm.timestamp = Some(Utc::now().timestamp_millis()); + if !actions.iter().any(|a| matches!(a, Action::CommitInfo(..))) { + let mut commit_info = operation.get_commit_info(); + commit_info.timestamp = Some(Utc::now().timestamp_millis()); app_metadata.insert( "clientVersion".to_string(), Value::String(format!("delta-rs.{}", crate_version())), ); - app_metadata.extend(cm.info); - cm.info = app_metadata.clone(); - commit_info = Some(Action::CommitInfo(cm)); + app_metadata.extend(commit_info.info); + commit_info.info = app_metadata.clone(); + actions.push(Action::CommitInfo(commit_info)) } - // safety: we assured commit_info is Some just above. - actions.insert(0, commit_info.unwrap()); for txn in &app_transactions { actions.push(Action::Txn(txn.clone())) From 5d2cf488ff7f69393b1e3c73bf72f1dcdd832768 Mon Sep 17 00:00:00 2001 From: Robert Pack Date: Tue, 21 Jan 2025 21:55:18 +0100 Subject: [PATCH 14/23] fix: tombstone replay Signed-off-by: Robert Pack --- .gitignore | 1 - crates/core/src/kernel/snapshot_next/lazy.rs | 93 ++++++++++++-------- crates/core/src/kernel/snapshot_next/mod.rs | 13 +-- 3 files changed, 58 insertions(+), 49 deletions(-) diff --git a/.gitignore b/.gitignore index ee7ca99235..c5aca6465b 100644 --- a/.gitignore +++ b/.gitignore @@ -22,7 +22,6 @@ __blobstorage__ .githubchangeloggenerator.cache.log .githubchangeloggenerator.cache/ .githubchangeloggenerator* -data .zed/ # Add all Cargo.lock files except for those in binary crates diff --git a/crates/core/src/kernel/snapshot_next/lazy.rs b/crates/core/src/kernel/snapshot_next/lazy.rs index 20ccfb7031..3a203e1ad1 100644 --- a/crates/core/src/kernel/snapshot_next/lazy.rs +++ b/crates/core/src/kernel/snapshot_next/lazy.rs @@ -9,15 +9,16 @@ use delta_kernel::actions::set_transaction::{SetTransactionMap, SetTransactionSc use delta_kernel::actions::{get_log_schema, REMOVE_NAME}; use delta_kernel::actions::{Metadata, Protocol, SetTransaction}; use delta_kernel::engine::arrow_data::ArrowEngineData; +use delta_kernel::engine::arrow_expression::evaluate_expression; use delta_kernel::engine::default::executor::tokio::{ TokioBackgroundExecutor, TokioMultiThreadExecutor, }; use delta_kernel::engine::default::DefaultEngine; use delta_kernel::log_segment::LogSegment; -use delta_kernel::schema::Schema; +use delta_kernel::schema::{DataType, Schema}; use delta_kernel::snapshot::Snapshot as SnapshotInner; use delta_kernel::table_properties::TableProperties; -use delta_kernel::{Engine, Expression, ExpressionRef, Table, Version}; +use delta_kernel::{Engine, Expression, ExpressionHandler, ExpressionRef, Table, Version}; use itertools::Itertools; use object_store::path::Path; use object_store::ObjectStore; @@ -25,7 +26,7 @@ use url::Url; use super::cache::CommitCacheObjectStore; use super::{replay_file_actions, Snapshot}; -use crate::kernel::{Action, CommitInfo}; +use crate::kernel::{Action, CommitInfo, ARROW_HANDLER}; use crate::{DeltaResult, DeltaTableError}; // TODO: avoid repetitive parsing of json stats @@ -94,11 +95,8 @@ impl Snapshot for LazySnapshot { } fn tombstones(&self) -> DeltaResult>>> { - static META_PREDICATE: LazyLock> = LazyLock::new(|| { - Some(Arc::new( - Expression::column([REMOVE_NAME, "path"]).is_not_null(), - )) - }); + static META_PREDICATE: LazyLock = + LazyLock::new(|| Arc::new(Expression::column([REMOVE_NAME, "path"]).is_not_null())); let read_schema = get_log_schema().project(&[REMOVE_NAME])?; Ok(Box::new( self.inner @@ -107,9 +105,23 @@ impl Snapshot for LazySnapshot { self.engine.as_ref(), read_schema.clone(), read_schema, - META_PREDICATE.clone(), + Some(META_PREDICATE.clone()), )? - .map_ok(|(d, _)| Ok(RecordBatch::from(ArrowEngineData::try_from_engine_data(d)?))) + .map_ok(|(d, _)| { + let batch = RecordBatch::from(ArrowEngineData::try_from_engine_data(d)?); + let selection = evaluate_expression( + META_PREDICATE.as_ref(), + &batch, + Some(&DataType::BOOLEAN), + )?; + let filter = selection + .as_any() + .downcast_ref::() + .ok_or_else(|| { + DeltaTableError::generic("failed to downcast to BooleanArray") + })?; + Ok(filter_record_batch(&batch, filter)?) + }) .flatten(), )) } @@ -247,37 +259,46 @@ impl LazySnapshot { #[cfg(test)] mod tests { - use deltalake_test::acceptance::{read_dat_case, TestCaseInfo}; + use delta_kernel::schema::StructType; + use deltalake_test::utils::*; use deltalake_test::TestResult; - use super::super::tests::get_dat_dir; use super::*; async fn load_snapshot() -> TestResult<()> { - // some comment - let mut dat_dir = get_dat_dir(); - dat_dir.push("multi_partitioned"); - - let dat_info: TestCaseInfo = read_dat_case(dat_dir)?; - let table_info = dat_info.table_summary()?; - - let table = Table::try_from_uri(dat_info.table_root()?)?; - - let snapshot = LazySnapshot::try_new( - table, - Arc::new(object_store::local::LocalFileSystem::default()), - None, - ) - .await?; - - assert_eq!(snapshot.version(), table_info.version); - assert_eq!( - ( - snapshot.protocol().min_reader_version(), - snapshot.protocol().min_writer_version() - ), - (table_info.min_reader_version, table_info.min_writer_version) - ); + let ctx = IntegrationContext::new(Box::::default())?; + ctx.load_table(TestTables::Simple).await?; + + let store = ctx + .table_builder(TestTables::Simple) + .build_storage()? + .object_store(None); + let table = Table::try_from_uri("memory:///")?; + let snapshot = LazySnapshot::try_new(table, store, None).await?; + + let schema_string = r#"{"type":"struct","fields":[{"name":"id","type":"long","nullable":true,"metadata":{}}]}"#; + let expected: StructType = serde_json::from_str(schema_string)?; + assert_eq!(snapshot.schema(), &expected); + + let infos = snapshot.commit_infos(None, None)?.collect_vec(); + assert_eq!(infos.len(), 5); + + let tombstones: Vec<_> = snapshot.tombstones()?.try_collect()?; + let num_tombstones = tombstones.iter().map(|b| b.num_rows() as i64).sum::(); + assert_eq!(num_tombstones, 31); + + let expected = vec![ + "part-00000-2befed33-c358-4768-a43c-3eda0d2a499d-c000.snappy.parquet", + "part-00000-c1777d7d-89d9-4790-b38a-6ee7e24456b1-c000.snappy.parquet", + "part-00001-7891c33d-cedc-47c3-88a6-abcfb049d3b4-c000.snappy.parquet", + "part-00004-315835fe-fb44-4562-98f6-5e6cfa3ae45d-c000.snappy.parquet", + "part-00007-3a0e4727-de0d-41b6-81ef-5223cf40f025-c000.snappy.parquet", + ]; + let file_names: Vec<_> = snapshot + .logical_files_view(None)? + .map_ok(|f| f.path().to_owned()) + .try_collect()?; + assert_eq!(file_names, expected); Ok(()) } diff --git a/crates/core/src/kernel/snapshot_next/mod.rs b/crates/core/src/kernel/snapshot_next/mod.rs index a415f71a99..079ceb0298 100644 --- a/crates/core/src/kernel/snapshot_next/mod.rs +++ b/crates/core/src/kernel/snapshot_next/mod.rs @@ -361,24 +361,13 @@ fn scan_as_log_data( #[cfg(test)] mod tests { - use std::{future::Future, path::PathBuf, pin::Pin}; + use std::{future::Future, pin::Pin}; use delta_kernel::Table; use deltalake_test::utils::*; use super::*; - pub(super) fn get_dat_dir() -> PathBuf { - let d = PathBuf::from(env!("CARGO_MANIFEST_DIR")); - let mut rep_root = d - .parent() - .and_then(|p| p.parent()) - .expect("valid directory") - .to_path_buf(); - rep_root.push("dat/out/reader_tests/generated"); - rep_root - } - fn get_lazy( ctx: &IntegrationContext, table: TestTables, From 9e5f1fbf94b133a56e20f5c28229db27c5f37a5b Mon Sep 17 00:00:00 2001 From: Robert Pack Date: Tue, 8 Apr 2025 19:23:58 +0200 Subject: [PATCH 15/23] fix: handle unknown features Signed-off-by: Robert Pack --- Cargo.toml | 10 ++++-- crates/core/src/kernel/models/actions.rs | 30 +++++++++++++++++- .../core/src/kernel/snapshot/log_segment.rs | 20 ++++++++++++ crates/core/src/kernel/snapshot/mod.rs | 5 +++ crates/core/src/operations/create.rs | 7 +--- .../src/operations/transaction/protocol.rs | 19 +++++++---- crates/core/src/storage/mod.rs | 4 +++ .../_delta_log/00000000000000000000.json | 3 ++ .../_delta_log/00000000000000000001.json | 11 +++++++ .../_delta_log/00000000000000000002.json | 11 +++++++ .../_delta_log/00000000000000000003.json | 11 +++++++ .../_delta_log/00000000000000000004.json | 11 +++++++ .../_delta_log/00000000000000000005.json | 11 +++++++ .../_delta_log/00000000000000000006.json | 11 +++++++ .../_delta_log/00000000000000000007.json | 11 +++++++ .../_delta_log/00000000000000000008.json | 11 +++++++ .../_delta_log/00000000000000000009.json | 11 +++++++ .../00000000000000000010.checkpoint.parquet | Bin 0 -> 21615 bytes .../_delta_log/00000000000000000010.json | 11 +++++++ .../_delta_log/00000000000000000011.json | 11 +++++++ .../_delta_log/00000000000000000012.json | 11 +++++++ .../_delta_log/00000000000000000013.json | 11 +++++++ .../table_a/_delta_log/_last_checkpoint | 1 + .../_delta_log/00000000000000000000.json | 3 ++ .../_delta_log/00000000000000000001.json | 11 +++++++ .../_delta_log/00000000000000000002.json | 11 +++++++ .../_delta_log/00000000000000000003.json | 11 +++++++ .../_delta_log/00000000000000000004.json | 11 +++++++ .../_delta_log/00000000000000000005.json | 11 +++++++ .../_delta_log/00000000000000000006.json | 11 +++++++ .../_delta_log/00000000000000000008.json | 11 +++++++ .../_delta_log/00000000000000000009.json | 11 +++++++ .../00000000000000000010.checkpoint.parquet | Bin 0 -> 21615 bytes .../_delta_log/00000000000000000010.json | 11 +++++++ .../_delta_log/00000000000000000011.json | 11 +++++++ .../_delta_log/00000000000000000012.json | 11 +++++++ .../_delta_log/00000000000000000013.json | 11 +++++++ .../table_b/_delta_log/_last_checkpoint | 1 + .../_delta_log/00000000000000000000.json | 3 ++ .../_delta_log/00000000000000000001.json | 11 +++++++ .../_delta_log/00000000000000000002.json | 11 +++++++ .../_delta_log/00000000000000000003.json | 11 +++++++ .../_delta_log/00000000000000000004.json | 11 +++++++ .../_delta_log/00000000000000000005.json | 11 +++++++ .../_delta_log/00000000000000000006.json | 11 +++++++ .../_delta_log/00000000000000000007.json | 11 +++++++ .../_delta_log/00000000000000000008.json | 11 +++++++ .../_delta_log/00000000000000000009.json | 11 +++++++ .../00000000000000000010.checkpoint.parquet | Bin 0 -> 21615 bytes .../_delta_log/00000000000000000010.json | 11 +++++++ .../_delta_log/00000000000000000011.json | 11 +++++++ .../_delta_log/00000000000000000013.json | 11 +++++++ .../table_c/_delta_log/_last_checkpoint | 1 + .../00000000000000000000.checkpoint.parquet | Bin 0 -> 10569 bytes .../_delta_log/00000000000000000000.json | 3 ++ .../table_d/_delta_log/_last_checkpoint | 1 + .../00000000000000000000.checkpoint.parquet | Bin 0 -> 11644 bytes .../_delta_log/00000000000000000000.json | 4 +++ .../table_e/_delta_log/_last_checkpoint | 1 + .../_delta_log/00000000000000000000.json | 4 +++ .../_delta_log/00000000000000000001.json | 2 ++ .../_delta_log/00000000000000000002.json | 2 ++ .../_delta_log/00000000000000000003.json | 2 ++ .../_delta_log/00000000000000000004.json | 2 ++ .../_delta_log/00000000000000000005.json | 2 ++ .../_delta_log/00000000000000000006.json | 2 ++ .../_delta_log/00000000000000000007.json | 2 ++ .../_delta_log/00000000000000000008.json | 2 ++ .../_delta_log/00000000000000000009.json | 2 ++ .../00000000000000000010.checkpoint.parquet | Bin 0 -> 12260 bytes .../_delta_log/00000000000000000010.json | 2 ++ .../_delta_log/00000000000000000011.json | 2 ++ .../_delta_log/00000000000000000012.json | 2 ++ .../_delta_log/00000000000000000013.json | 2 ++ .../table_f/_delta_log/_last_checkpoint | 1 + .../_delta_log/00000000000000000000.json | 3 ++ .../_delta_log/00000000000000000001.json | 11 +++++++ .../_delta_log/00000000000000000002.json | 11 +++++++ .../_delta_log/00000000000000000003.json | 11 +++++++ .../_delta_log/00000000000000000004.json | 11 +++++++ .../_delta_log/00000000000000000005.json | 11 +++++++ .../_delta_log/00000000000000000006.json | 11 +++++++ .../_delta_log/00000000000000000007.json | 11 +++++++ .../_delta_log/00000000000000000008.json | 11 +++++++ .../_delta_log/00000000000000000009.json | 11 +++++++ .../00000000000000000010.checkpoint.parquet | Bin 0 -> 21615 bytes .../_delta_log/00000000000000000010.json | 11 +++++++ .../_delta_log/00000000000000000011.json | 11 +++++++ .../_delta_log/00000000000000000012.json | 11 +++++++ .../_delta_log/00000000000000000013.json | 11 +++++++ .../00000000000000009999.checkpoint.parquet | Bin 0 -> 21615 bytes .../_delta_log/00000000000000009999.json | 11 +++++++ .../table_g/_delta_log/_last_checkpoint | 1 + .../_delta_log/00000000000000000000.json | 3 ++ .../_delta_log/00000000000000000001.json | 11 +++++++ .../_delta_log/00000000000000000002.json | 11 +++++++ .../_delta_log/00000000000000000003.json | 11 +++++++ .../_delta_log/00000000000000000004.json | 11 +++++++ .../_delta_log/00000000000000000005.json | 11 +++++++ .../_delta_log/00000000000000000006.json | 11 +++++++ .../_delta_log/00000000000000000007.json | 11 +++++++ .../_delta_log/00000000000000000008.json | 11 +++++++ .../_delta_log/00000000000000000009.json | 11 +++++++ .../00000000000000000010.checkpoint.parquet | Bin 0 -> 21615 bytes .../_delta_log/00000000000000000010.json | 11 +++++++ .../_delta_log/00000000000000000011.json | 11 +++++++ .../_delta_log/00000000000000000012.json | 11 +++++++ .../_delta_log/00000000000000000013.json | 11 +++++++ .../00000000000000009999.checkpoint.parquet | Bin 0 -> 10569 bytes .../_delta_log/00000000000000009999.json | 3 ++ .../table_h/_delta_log/_last_checkpoint | 1 + .../_delta_log/00000000000000000000.json | 3 ++ .../_delta_log/00000000000000000001.json | 11 +++++++ .../_delta_log/00000000000000000002.json | 11 +++++++ .../_delta_log/00000000000000000003.json | 11 +++++++ .../_delta_log/00000000000000000004.json | 11 +++++++ .../_delta_log/00000000000000000005.json | 11 +++++++ .../_delta_log/00000000000000000006.json | 11 +++++++ .../_delta_log/00000000000000000007.json | 11 +++++++ .../_delta_log/00000000000000000008.json | 11 +++++++ .../_delta_log/00000000000000000009.json | 11 +++++++ .../00000000000000000010.checkpoint.parquet | Bin 0 -> 21615 bytes .../_delta_log/00000000000000000010.json | 11 +++++++ .../_delta_log/00000000000000000011.json | 11 +++++++ .../_delta_log/00000000000000000012.json | 11 +++++++ .../_delta_log/00000000000000000013.json | 11 +++++++ .../00000000000000009999.checkpoint.parquet | Bin 0 -> 11644 bytes .../_delta_log/00000000000000009999.json | 4 +++ .../table_i/_delta_log/_last_checkpoint | 1 + crates/core/tests/exotic_tables.rs | 30 ++++++++++++++++++ python/src/schema.rs | 13 +++----- 131 files changed, 1030 insertions(+), 26 deletions(-) create mode 100644 crates/core/tests/data_err_logs/table_a/_delta_log/00000000000000000000.json create mode 100644 crates/core/tests/data_err_logs/table_a/_delta_log/00000000000000000001.json create mode 100644 crates/core/tests/data_err_logs/table_a/_delta_log/00000000000000000002.json create mode 100644 crates/core/tests/data_err_logs/table_a/_delta_log/00000000000000000003.json create mode 100644 crates/core/tests/data_err_logs/table_a/_delta_log/00000000000000000004.json create mode 100644 crates/core/tests/data_err_logs/table_a/_delta_log/00000000000000000005.json create mode 100644 crates/core/tests/data_err_logs/table_a/_delta_log/00000000000000000006.json create mode 100644 crates/core/tests/data_err_logs/table_a/_delta_log/00000000000000000007.json create mode 100644 crates/core/tests/data_err_logs/table_a/_delta_log/00000000000000000008.json create mode 100644 crates/core/tests/data_err_logs/table_a/_delta_log/00000000000000000009.json create mode 100644 crates/core/tests/data_err_logs/table_a/_delta_log/00000000000000000010.checkpoint.parquet create mode 100644 crates/core/tests/data_err_logs/table_a/_delta_log/00000000000000000010.json create mode 100644 crates/core/tests/data_err_logs/table_a/_delta_log/00000000000000000011.json create mode 100644 crates/core/tests/data_err_logs/table_a/_delta_log/00000000000000000012.json create mode 100644 crates/core/tests/data_err_logs/table_a/_delta_log/00000000000000000013.json create mode 100644 crates/core/tests/data_err_logs/table_a/_delta_log/_last_checkpoint create mode 100644 crates/core/tests/data_err_logs/table_b/_delta_log/00000000000000000000.json create mode 100644 crates/core/tests/data_err_logs/table_b/_delta_log/00000000000000000001.json create mode 100644 crates/core/tests/data_err_logs/table_b/_delta_log/00000000000000000002.json create mode 100644 crates/core/tests/data_err_logs/table_b/_delta_log/00000000000000000003.json create mode 100644 crates/core/tests/data_err_logs/table_b/_delta_log/00000000000000000004.json create mode 100644 crates/core/tests/data_err_logs/table_b/_delta_log/00000000000000000005.json create mode 100644 crates/core/tests/data_err_logs/table_b/_delta_log/00000000000000000006.json create mode 100644 crates/core/tests/data_err_logs/table_b/_delta_log/00000000000000000008.json create mode 100644 crates/core/tests/data_err_logs/table_b/_delta_log/00000000000000000009.json create mode 100644 crates/core/tests/data_err_logs/table_b/_delta_log/00000000000000000010.checkpoint.parquet create mode 100644 crates/core/tests/data_err_logs/table_b/_delta_log/00000000000000000010.json create mode 100644 crates/core/tests/data_err_logs/table_b/_delta_log/00000000000000000011.json create mode 100644 crates/core/tests/data_err_logs/table_b/_delta_log/00000000000000000012.json create mode 100644 crates/core/tests/data_err_logs/table_b/_delta_log/00000000000000000013.json create mode 100644 crates/core/tests/data_err_logs/table_b/_delta_log/_last_checkpoint create mode 100644 crates/core/tests/data_err_logs/table_c/_delta_log/00000000000000000000.json create mode 100644 crates/core/tests/data_err_logs/table_c/_delta_log/00000000000000000001.json create mode 100644 crates/core/tests/data_err_logs/table_c/_delta_log/00000000000000000002.json create mode 100644 crates/core/tests/data_err_logs/table_c/_delta_log/00000000000000000003.json create mode 100644 crates/core/tests/data_err_logs/table_c/_delta_log/00000000000000000004.json create mode 100644 crates/core/tests/data_err_logs/table_c/_delta_log/00000000000000000005.json create mode 100644 crates/core/tests/data_err_logs/table_c/_delta_log/00000000000000000006.json create mode 100644 crates/core/tests/data_err_logs/table_c/_delta_log/00000000000000000007.json create mode 100644 crates/core/tests/data_err_logs/table_c/_delta_log/00000000000000000008.json create mode 100644 crates/core/tests/data_err_logs/table_c/_delta_log/00000000000000000009.json create mode 100644 crates/core/tests/data_err_logs/table_c/_delta_log/00000000000000000010.checkpoint.parquet create mode 100644 crates/core/tests/data_err_logs/table_c/_delta_log/00000000000000000010.json create mode 100644 crates/core/tests/data_err_logs/table_c/_delta_log/00000000000000000011.json create mode 100644 crates/core/tests/data_err_logs/table_c/_delta_log/00000000000000000013.json create mode 100644 crates/core/tests/data_err_logs/table_c/_delta_log/_last_checkpoint create mode 100644 crates/core/tests/data_err_logs/table_d/_delta_log/00000000000000000000.checkpoint.parquet create mode 100644 crates/core/tests/data_err_logs/table_d/_delta_log/00000000000000000000.json create mode 100644 crates/core/tests/data_err_logs/table_d/_delta_log/_last_checkpoint create mode 100644 crates/core/tests/data_err_logs/table_e/_delta_log/00000000000000000000.checkpoint.parquet create mode 100644 crates/core/tests/data_err_logs/table_e/_delta_log/00000000000000000000.json create mode 100644 crates/core/tests/data_err_logs/table_e/_delta_log/_last_checkpoint create mode 100644 crates/core/tests/data_err_logs/table_f/_delta_log/00000000000000000000.json create mode 100644 crates/core/tests/data_err_logs/table_f/_delta_log/00000000000000000001.json create mode 100644 crates/core/tests/data_err_logs/table_f/_delta_log/00000000000000000002.json create mode 100644 crates/core/tests/data_err_logs/table_f/_delta_log/00000000000000000003.json create mode 100644 crates/core/tests/data_err_logs/table_f/_delta_log/00000000000000000004.json create mode 100644 crates/core/tests/data_err_logs/table_f/_delta_log/00000000000000000005.json create mode 100644 crates/core/tests/data_err_logs/table_f/_delta_log/00000000000000000006.json create mode 100644 crates/core/tests/data_err_logs/table_f/_delta_log/00000000000000000007.json create mode 100644 crates/core/tests/data_err_logs/table_f/_delta_log/00000000000000000008.json create mode 100644 crates/core/tests/data_err_logs/table_f/_delta_log/00000000000000000009.json create mode 100644 crates/core/tests/data_err_logs/table_f/_delta_log/00000000000000000010.checkpoint.parquet create mode 100644 crates/core/tests/data_err_logs/table_f/_delta_log/00000000000000000010.json create mode 100644 crates/core/tests/data_err_logs/table_f/_delta_log/00000000000000000011.json create mode 100644 crates/core/tests/data_err_logs/table_f/_delta_log/00000000000000000012.json create mode 100644 crates/core/tests/data_err_logs/table_f/_delta_log/00000000000000000013.json create mode 100644 crates/core/tests/data_err_logs/table_f/_delta_log/_last_checkpoint create mode 100644 crates/core/tests/data_err_logs/table_g/_delta_log/00000000000000000000.json create mode 100644 crates/core/tests/data_err_logs/table_g/_delta_log/00000000000000000001.json create mode 100644 crates/core/tests/data_err_logs/table_g/_delta_log/00000000000000000002.json create mode 100644 crates/core/tests/data_err_logs/table_g/_delta_log/00000000000000000003.json create mode 100644 crates/core/tests/data_err_logs/table_g/_delta_log/00000000000000000004.json create mode 100644 crates/core/tests/data_err_logs/table_g/_delta_log/00000000000000000005.json create mode 100644 crates/core/tests/data_err_logs/table_g/_delta_log/00000000000000000006.json create mode 100644 crates/core/tests/data_err_logs/table_g/_delta_log/00000000000000000007.json create mode 100644 crates/core/tests/data_err_logs/table_g/_delta_log/00000000000000000008.json create mode 100644 crates/core/tests/data_err_logs/table_g/_delta_log/00000000000000000009.json create mode 100644 crates/core/tests/data_err_logs/table_g/_delta_log/00000000000000000010.checkpoint.parquet create mode 100644 crates/core/tests/data_err_logs/table_g/_delta_log/00000000000000000010.json create mode 100644 crates/core/tests/data_err_logs/table_g/_delta_log/00000000000000000011.json create mode 100644 crates/core/tests/data_err_logs/table_g/_delta_log/00000000000000000012.json create mode 100644 crates/core/tests/data_err_logs/table_g/_delta_log/00000000000000000013.json create mode 100644 crates/core/tests/data_err_logs/table_g/_delta_log/00000000000000009999.checkpoint.parquet create mode 100644 crates/core/tests/data_err_logs/table_g/_delta_log/00000000000000009999.json create mode 100644 crates/core/tests/data_err_logs/table_g/_delta_log/_last_checkpoint create mode 100644 crates/core/tests/data_err_logs/table_h/_delta_log/00000000000000000000.json create mode 100644 crates/core/tests/data_err_logs/table_h/_delta_log/00000000000000000001.json create mode 100644 crates/core/tests/data_err_logs/table_h/_delta_log/00000000000000000002.json create mode 100644 crates/core/tests/data_err_logs/table_h/_delta_log/00000000000000000003.json create mode 100644 crates/core/tests/data_err_logs/table_h/_delta_log/00000000000000000004.json create mode 100644 crates/core/tests/data_err_logs/table_h/_delta_log/00000000000000000005.json create mode 100644 crates/core/tests/data_err_logs/table_h/_delta_log/00000000000000000006.json create mode 100644 crates/core/tests/data_err_logs/table_h/_delta_log/00000000000000000007.json create mode 100644 crates/core/tests/data_err_logs/table_h/_delta_log/00000000000000000008.json create mode 100644 crates/core/tests/data_err_logs/table_h/_delta_log/00000000000000000009.json create mode 100644 crates/core/tests/data_err_logs/table_h/_delta_log/00000000000000000010.checkpoint.parquet create mode 100644 crates/core/tests/data_err_logs/table_h/_delta_log/00000000000000000010.json create mode 100644 crates/core/tests/data_err_logs/table_h/_delta_log/00000000000000000011.json create mode 100644 crates/core/tests/data_err_logs/table_h/_delta_log/00000000000000000012.json create mode 100644 crates/core/tests/data_err_logs/table_h/_delta_log/00000000000000000013.json create mode 100644 crates/core/tests/data_err_logs/table_h/_delta_log/00000000000000009999.checkpoint.parquet create mode 100644 crates/core/tests/data_err_logs/table_h/_delta_log/00000000000000009999.json create mode 100644 crates/core/tests/data_err_logs/table_h/_delta_log/_last_checkpoint create mode 100644 crates/core/tests/data_err_logs/table_i/_delta_log/00000000000000000000.json create mode 100644 crates/core/tests/data_err_logs/table_i/_delta_log/00000000000000000001.json create mode 100644 crates/core/tests/data_err_logs/table_i/_delta_log/00000000000000000002.json create mode 100644 crates/core/tests/data_err_logs/table_i/_delta_log/00000000000000000003.json create mode 100644 crates/core/tests/data_err_logs/table_i/_delta_log/00000000000000000004.json create mode 100644 crates/core/tests/data_err_logs/table_i/_delta_log/00000000000000000005.json create mode 100644 crates/core/tests/data_err_logs/table_i/_delta_log/00000000000000000006.json create mode 100644 crates/core/tests/data_err_logs/table_i/_delta_log/00000000000000000007.json create mode 100644 crates/core/tests/data_err_logs/table_i/_delta_log/00000000000000000008.json create mode 100644 crates/core/tests/data_err_logs/table_i/_delta_log/00000000000000000009.json create mode 100644 crates/core/tests/data_err_logs/table_i/_delta_log/00000000000000000010.checkpoint.parquet create mode 100644 crates/core/tests/data_err_logs/table_i/_delta_log/00000000000000000010.json create mode 100644 crates/core/tests/data_err_logs/table_i/_delta_log/00000000000000000011.json create mode 100644 crates/core/tests/data_err_logs/table_i/_delta_log/00000000000000000012.json create mode 100644 crates/core/tests/data_err_logs/table_i/_delta_log/00000000000000000013.json create mode 100644 crates/core/tests/data_err_logs/table_i/_delta_log/00000000000000009999.checkpoint.parquet create mode 100644 crates/core/tests/data_err_logs/table_i/_delta_log/00000000000000009999.json create mode 100644 crates/core/tests/data_err_logs/table_i/_delta_log/_last_checkpoint create mode 100644 crates/core/tests/exotic_tables.rs diff --git a/Cargo.toml b/Cargo.toml index 0fe711794c..ba562f3d07 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,8 +26,12 @@ debug = true debug = "line-tables-only" [workspace.dependencies] -delta_kernel = { version = "0.8.0", features = ["arrow_54"] } -#delta_kernel = { path = "../delta-kernel-rs/kernel", features = ["sync-engine"] } +# delta_kernel = { version = "0.8.0", features = ["arrow_54", "default-engine-rustls"] } +delta_kernel = { path = "../delta-kernel-rs/kernel", features = [ + "arrow_54", + "developer-visibility", + "default-engine-rustls", +] } # arrow arrow = { version = "54" } @@ -59,7 +63,7 @@ datafusion-sql = "46" # serde serde = { version = "1.0.194", features = ["derive"] } serde_json = "1" -strum = { version = "0.26"} +strum = { version = "0.26" } # "stdlib" diff --git a/crates/core/src/kernel/models/actions.rs b/crates/core/src/kernel/models/actions.rs index a157a1df97..643edfe4d5 100644 --- a/crates/core/src/kernel/models/actions.rs +++ b/crates/core/src/kernel/models/actions.rs @@ -3,6 +3,7 @@ use std::fmt::{self, Display}; use std::str::FromStr; use delta_kernel::schema::{DataType, StructField}; +use delta_kernel::table_features::{ReaderFeatures, WriterFeatures}; use maplit::hashset; use serde::{Deserialize, Serialize}; use tracing::warn; @@ -12,7 +13,6 @@ use super::schema::StructType; use super::StructTypeExt; use crate::kernel::{error::Error, DeltaResult}; use crate::TableProperty; -use delta_kernel::table_features::{ReaderFeatures, WriterFeatures}; /// Defines a file format used in table #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] @@ -1211,6 +1211,34 @@ mod tests { println!("{types:?}"); } + #[test] + fn test_deserialize_protocol() { + // protocol json data + let raw = serde_json::json!( + { + "minReaderVersion": 3, + "minWriterVersion": 7, + "readerFeatures": ["catalogOwned"], + "writerFeatures": ["catalogOwned", "invariants", "appendOnly"] + } + ); + let protocol: Protocol = serde_json::from_value(raw).unwrap(); + assert_eq!(protocol.min_reader_version, 3); + assert_eq!(protocol.min_writer_version, 7); + assert_eq!( + protocol.reader_features, + Some(hashset! {ReaderFeatures::Unknown("catalogOwned".to_owned())}) + ); + assert_eq!( + protocol.writer_features, + Some(hashset! { + WriterFeatures::Unknown("catalogOwned".to_owned()), + WriterFeatures::Invariants, + WriterFeatures::AppendOnly + }) + ); + } + // #[test] // fn test_deletion_vector_read() { // let store = Arc::new(LocalFileSystem::new()); diff --git a/crates/core/src/kernel/snapshot/log_segment.rs b/crates/core/src/kernel/snapshot/log_segment.rs index a6b742fde1..74c43b4837 100644 --- a/crates/core/src/kernel/snapshot/log_segment.rs +++ b/crates/core/src/kernel/snapshot/log_segment.rs @@ -188,10 +188,30 @@ impl LogSegment { segment.version = segment .file_version() .unwrap_or(end_version.unwrap_or(start_version)); + + segment.validate()?; + Ok(segment) } pub fn validate(&self) -> DeltaResult<()> { + let is_contiguous = self + .commit_files + .iter() + .collect_vec() + .windows(2) + .all(|cfs| { + cfs[0].location.commit_version().unwrap() - 1 + == cfs[1].location.commit_version().unwrap() + }); + if !is_contiguous { + println!("commit files: {:?}", self.commit_files); + println!("checkpoint files: {:?}", self.checkpoint_files); + return Err(DeltaTableError::Generic( + "non-contiguous log segment".into(), + )); + } + let checkpoint_version = self .checkpoint_files .iter() diff --git a/crates/core/src/kernel/snapshot/mod.rs b/crates/core/src/kernel/snapshot/mod.rs index 51bafb2c99..c15ec7c0a2 100644 --- a/crates/core/src/kernel/snapshot/mod.rs +++ b/crates/core/src/kernel/snapshot/mod.rs @@ -39,6 +39,7 @@ use crate::kernel::parse::read_cdf_adds; use crate::kernel::{ActionType, StructType}; use crate::logstore::LogStore; use crate::operations::transaction::CommitData; +use crate::operations::transaction::PROTOCOL; use crate::table::config::TableConfig; use crate::{DeltaResult, DeltaTableConfig, DeltaTableError}; @@ -81,6 +82,10 @@ impl Snapshot { }; let (metadata, protocol) = (metadata.unwrap(), protocol.unwrap()); let schema = serde_json::from_str(&metadata.schema_string)?; + + println!("{:?}", protocol); + PROTOCOL.can_read_from_protocol(&protocol)?; + Ok(Self { log_segment, config, diff --git a/crates/core/src/operations/create.rs b/crates/core/src/operations/create.rs index 2ad04a93cd..090b116637 100644 --- a/crates/core/src/operations/create.rs +++ b/crates/core/src/operations/create.rs @@ -144,12 +144,7 @@ impl CreateBuilder { if let Value::Number(n) = v { n.as_i64().map_or_else( || MetadataValue::String(v.to_string()), - |i| { - i32::try_from(i) - .ok() - .map(MetadataValue::Number) - .unwrap_or_else(|| MetadataValue::String(v.to_string())) - }, + |i| MetadataValue::Number(i), ) } else { MetadataValue::String(v.to_string()) diff --git a/crates/core/src/operations/transaction/protocol.rs b/crates/core/src/operations/transaction/protocol.rs index 8fcf013c54..85c6deaea1 100644 --- a/crates/core/src/operations/transaction/protocol.rs +++ b/crates/core/src/operations/transaction/protocol.rs @@ -2,7 +2,7 @@ use std::collections::HashSet; use std::sync::LazyLock; use super::{TableReference, TransactionError}; -use crate::kernel::{contains_timestampntz, Action, EagerSnapshot, Schema}; +use crate::kernel::{contains_timestampntz, Action, EagerSnapshot, Protocol, Schema}; use crate::protocol::DeltaOperation; use crate::table::state::DeltaTableState; use delta_kernel::table_features::{ReaderFeatures, WriterFeatures}; @@ -113,12 +113,17 @@ impl ProtocolChecker { /// Check if delta-rs can read form the given delta table. pub fn can_read_from(&self, snapshot: &dyn TableReference) -> Result<(), TransactionError> { - let required_features: Option<&HashSet> = - match snapshot.protocol().min_reader_version { - 0 | 1 => None, - 2 => Some(&READER_V2), - _ => snapshot.protocol().reader_features.as_ref(), - }; + self.can_read_from_protocol(snapshot.protocol()) + } + + pub fn can_read_from_protocol(&self, protocol: &Protocol) -> Result<(), TransactionError> { + let required_features: Option<&HashSet> = match protocol.min_reader_version + { + 0 | 1 => None, + 2 => Some(&READER_V2), + _ => protocol.reader_features.as_ref(), + }; + println!("required_features: {:?}", required_features); if let Some(features) = required_features { let mut diff = features.difference(&self.reader_features).peekable(); if diff.peek().is_some() { diff --git a/crates/core/src/storage/mod.rs b/crates/core/src/storage/mod.rs index 699f9ea11f..e8245fcbcc 100644 --- a/crates/core/src/storage/mod.rs +++ b/crates/core/src/storage/mod.rs @@ -649,7 +649,11 @@ mod tests { #[cfg(feature = "cloud")] #[test] + #[cfg(feature = "cloud")] fn test_retry_config_from_options() { + use maplit::hashmap; + use std::time::Duration; + struct TestFactory {} impl RetryConfigParse for TestFactory {} diff --git a/crates/core/tests/data_err_logs/table_a/_delta_log/00000000000000000000.json b/crates/core/tests/data_err_logs/table_a/_delta_log/00000000000000000000.json new file mode 100644 index 0000000000..0316f09771 --- /dev/null +++ b/crates/core/tests/data_err_logs/table_a/_delta_log/00000000000000000000.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1742317262289,"operation":"CREATE TABLE","operationParameters":{"partitionBy":"[]","clusterBy":"[]","description":null,"isManaged":"false","properties":"{}"},"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"c0983a7a-aeca-4ba8-a509-d5dbc71a10de"}} +{"metaData":{"id":"5f54ef5f-e511-4114-b6e5-f6c206c068b6","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1742317261939}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":1}} diff --git a/crates/core/tests/data_err_logs/table_a/_delta_log/00000000000000000001.json b/crates/core/tests/data_err_logs/table_a/_delta_log/00000000000000000001.json new file mode 100644 index 0000000000..23002c6f58 --- /dev/null +++ b/crates/core/tests/data_err_logs/table_a/_delta_log/00000000000000000001.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317316973,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":0,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4819"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"a1c49e36-8fcd-46e6-8ede-5b8560f7ec3b"}} +{"add":{"path":"part-00001-665397f5-1435-4478-a598-ca226c99ffcf-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317316825,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":0},\"maxValues\":{\"id\":0},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-fd50becf-074e-4a1f-985b-01529e9f7b03-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317316820,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":1},\"maxValues\":{\"id\":1},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-4fc1f70f-9daa-46e6-83b5-ea8144d4a96d-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317316822,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":2},\"maxValues\":{\"id\":2},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-efb0808c-3b7f-4a4d-bc36-daa91c074b5b-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317316825,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":3},\"maxValues\":{\"id\":3},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-c8664e02-01fe-4c2d-8eba-ae84012d7aad-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317316825,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":4},\"maxValues\":{\"id\":4},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-7446ef47-3110-4c3f-a2d0-0c71bafc893a-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317316820,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":5},\"maxValues\":{\"id\":5},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-ee372cff-6aae-4979-970b-88cc154a31bd-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317316822,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":6},\"maxValues\":{\"id\":6},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-9ea59fdf-fc26-4650-a282-9c2cc1906c7c-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317316823,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":7},\"maxValues\":{\"id\":7},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-d50ebfbf-e534-4bc8-b63d-437f6029da6e-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317316823,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":8},\"maxValues\":{\"id\":8},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-96e67376-3d15-4895-bd5d-5e0a325bcb83-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317316823,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":9},\"maxValues\":{\"id\":9},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_a/_delta_log/00000000000000000002.json b/crates/core/tests/data_err_logs/table_a/_delta_log/00000000000000000002.json new file mode 100644 index 0000000000..86e3c7470c --- /dev/null +++ b/crates/core/tests/data_err_logs/table_a/_delta_log/00000000000000000002.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317326453,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":1,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4818"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"ac59851b-981e-4e82-96ea-36a543cfe254"}} +{"add":{"path":"part-00001-6491d41d-d498-4a89-a291-92d964035606-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317326374,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":10},\"maxValues\":{\"id\":10},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-d359921a-3cb1-454d-8aa0-ac5c830fcdc5-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317326373,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":11},\"maxValues\":{\"id\":11},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-b2aae64d-1fab-4106-bc87-2454e945dada-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317326373,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":12},\"maxValues\":{\"id\":12},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-cca989ea-d56e-4e1e-a4ba-538ef7801997-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317326373,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":13},\"maxValues\":{\"id\":13},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-14706643-f3f0-4ba9-8282-7d55bb4ecacb-c000.snappy.parquet","partitionValues":{},"size":451,"modificationTime":1742317326374,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":14},\"maxValues\":{\"id\":14},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-25801ed5-1cf2-43fa-bbdb-8898fc102e64-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317326373,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":15},\"maxValues\":{\"id\":15},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-03fbeb6f-b8a5-448e-afa7-0f49fca61866-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317326374,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":16},\"maxValues\":{\"id\":16},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-08361c76-870e-4ddf-9153-f67852849ec3-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317326374,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":17},\"maxValues\":{\"id\":17},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-368e738e-0673-4e76-a1ff-5ba9c755396e-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317326373,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":18},\"maxValues\":{\"id\":18},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-2c9a3837-e2c5-42bd-b888-f3205f4b894c-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317326374,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":19},\"maxValues\":{\"id\":19},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_a/_delta_log/00000000000000000003.json b/crates/core/tests/data_err_logs/table_a/_delta_log/00000000000000000003.json new file mode 100644 index 0000000000..14665bcd27 --- /dev/null +++ b/crates/core/tests/data_err_logs/table_a/_delta_log/00000000000000000003.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317330682,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":2,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4818"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"5e6fdfee-2a82-40d1-bc83-47b3f0f3f21e"}} +{"add":{"path":"part-00001-b7be8377-b715-4234-b316-201fd2c9c142-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317330578,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":20},\"maxValues\":{\"id\":20},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-719b1086-b19c-45d1-8c4c-c11db02e2e0b-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317330581,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":21},\"maxValues\":{\"id\":21},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-eb0fd03c-3e47-42ed-9897-e79dd1567fb1-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317330577,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":22},\"maxValues\":{\"id\":22},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-48b99dee-6d3e-4cfb-b651-4769de7f5b24-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317330577,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":23},\"maxValues\":{\"id\":23},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-b873a231-8352-4bac-b6f1-b53ee738d212-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317330582,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":24},\"maxValues\":{\"id\":24},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-74ed051c-b116-4947-b62c-2086bcd5bb90-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317330577,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":25},\"maxValues\":{\"id\":25},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-680d9e49-afce-4a7d-bca8-b03438c2fd74-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317330577,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":26},\"maxValues\":{\"id\":26},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-81c10052-aacc-4ecf-b9cf-64f81b3bd435-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317330577,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":27},\"maxValues\":{\"id\":27},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-cf9cac69-932c-43bf-8e4b-fd059d519c0f-c000.snappy.parquet","partitionValues":{},"size":451,"modificationTime":1742317330577,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":28},\"maxValues\":{\"id\":28},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-43e3252c-8ac7-4c7a-bcb4-15aaf7ae95b9-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317330581,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":29},\"maxValues\":{\"id\":29},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_a/_delta_log/00000000000000000004.json b/crates/core/tests/data_err_logs/table_a/_delta_log/00000000000000000004.json new file mode 100644 index 0000000000..ed86e283d5 --- /dev/null +++ b/crates/core/tests/data_err_logs/table_a/_delta_log/00000000000000000004.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317333588,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":3,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4818"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"57611e9e-5312-4502-a3ad-c0c78799773e"}} +{"add":{"path":"part-00001-5c92b4bb-af84-4066-8aeb-1e493b7147df-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317333524,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":30},\"maxValues\":{\"id\":30},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-b050a084-ab25-420f-bb7b-50eb95d25e4e-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317333551,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":31},\"maxValues\":{\"id\":31},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-1880b504-fcf5-4f43-92d4-c43e8dd9d7d9-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317333550,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":32},\"maxValues\":{\"id\":32},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-4af9173b-8472-41cd-8772-e2bdb084c5d5-c000.snappy.parquet","partitionValues":{},"size":451,"modificationTime":1742317333524,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":33},\"maxValues\":{\"id\":33},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-89cec234-f844-4802-a786-5d9133bbe489-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317333523,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":34},\"maxValues\":{\"id\":34},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-fa11b282-ec0d-4513-9baf-2b84c5f94a12-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317333550,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":35},\"maxValues\":{\"id\":35},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-895723cb-0dba-4019-a2a9-e6db9a937c91-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317333550,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":36},\"maxValues\":{\"id\":36},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-dca57e7a-f859-4b39-bc43-03e1061f1b4e-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317333551,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":37},\"maxValues\":{\"id\":37},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-96756753-7714-4c07-a238-d5b57f42a8ce-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317333523,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":38},\"maxValues\":{\"id\":38},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-9a7b861f-5d9a-41c4-b4ec-7f0d1391acfe-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317333551,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":39},\"maxValues\":{\"id\":39},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_a/_delta_log/00000000000000000005.json b/crates/core/tests/data_err_logs/table_a/_delta_log/00000000000000000005.json new file mode 100644 index 0000000000..f77fd655fe --- /dev/null +++ b/crates/core/tests/data_err_logs/table_a/_delta_log/00000000000000000005.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317336099,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":4,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4819"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"203edd74-d2b2-46fe-935a-6222cfb888d4"}} +{"add":{"path":"part-00001-a24fe71c-ba35-47bb-8f3e-636d5991d5ae-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317336058,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":40},\"maxValues\":{\"id\":40},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-a56ad50f-bc64-44cb-bb55-e2d177947b3d-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317336079,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":41},\"maxValues\":{\"id\":41},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-95c53cf1-b472-4c34-b728-1dd7cbed8b2f-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317336074,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":42},\"maxValues\":{\"id\":42},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-7e442ced-e810-44d9-9d28-3027e652a0ec-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317336080,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":43},\"maxValues\":{\"id\":43},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-68248457-2fa3-407e-9de3-759b1e052b99-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317336075,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":44},\"maxValues\":{\"id\":44},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-89a48ead-5bf3-4d16-aada-97c11386fcaf-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317336076,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":45},\"maxValues\":{\"id\":45},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-42b618de-c46c-4888-9b48-b99493ec2983-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317336070,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":46},\"maxValues\":{\"id\":46},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-4dc49305-f4f8-4ec9-9a40-8f4b3bd81324-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317336055,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":47},\"maxValues\":{\"id\":47},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-d7bb17a9-223e-474b-9d78-2c745cc35a4b-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317336054,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":48},\"maxValues\":{\"id\":48},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-68a79bb6-a31e-49bf-848f-2d64ceb834c0-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317336079,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":49},\"maxValues\":{\"id\":49},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_a/_delta_log/00000000000000000006.json b/crates/core/tests/data_err_logs/table_a/_delta_log/00000000000000000006.json new file mode 100644 index 0000000000..28116ba9f2 --- /dev/null +++ b/crates/core/tests/data_err_logs/table_a/_delta_log/00000000000000000006.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317338700,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":5,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4818"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"9ee4b86b-1bbe-4d6e-adbd-8dd4961989fb"}} +{"add":{"path":"part-00001-a8fc5b00-29e4-4a99-961d-b0cbcc23d165-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317338678,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":50},\"maxValues\":{\"id\":50},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-5a3079f8-abbc-4b5f-a1e3-340830e59222-c000.snappy.parquet","partitionValues":{},"size":451,"modificationTime":1742317338670,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":51},\"maxValues\":{\"id\":51},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-567d7b20-b6ce-4e96-b500-caa34c80f8a7-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317338676,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":52},\"maxValues\":{\"id\":52},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-b794df4b-174f-468a-9de7-2aa865ba7014-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317338676,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":53},\"maxValues\":{\"id\":53},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-a15406cf-c141-4f7b-b302-e4b5a145cad5-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317338675,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":54},\"maxValues\":{\"id\":54},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-8a52349c-d93b-4c59-b493-13486bb5e284-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317338680,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":55},\"maxValues\":{\"id\":55},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-1734b4eb-4414-4b3a-8e99-1bd099c9e6b5-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317338670,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":56},\"maxValues\":{\"id\":56},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-02781f2a-6c34-42ca-80a4-e830b2eeb963-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317338676,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":57},\"maxValues\":{\"id\":57},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-63ec1c21-c31c-43d4-b5c9-9c206aeeb280-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317338670,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":58},\"maxValues\":{\"id\":58},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-b732e8e4-7d1b-470d-89a5-86a3f8d8bdc2-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317338680,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":59},\"maxValues\":{\"id\":59},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_a/_delta_log/00000000000000000007.json b/crates/core/tests/data_err_logs/table_a/_delta_log/00000000000000000007.json new file mode 100644 index 0000000000..956c8b508c --- /dev/null +++ b/crates/core/tests/data_err_logs/table_a/_delta_log/00000000000000000007.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317339658,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":6,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4819"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"63c885b0-74eb-4075-a02a-a43b8202b3f8"}} +{"add":{"path":"part-00001-f697bc51-b77f-4234-938a-5f85478cedec-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317339621,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":60},\"maxValues\":{\"id\":60},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-839f210e-cf84-4c5c-b185-fd2fe2b5ee6f-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317339491,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":61},\"maxValues\":{\"id\":61},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-9ebe1c22-87a1-4f37-a695-77658c3e70a8-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317339491,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":62},\"maxValues\":{\"id\":62},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-01b2687c-45e4-484c-b1d6-80e06b5b5d11-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317339490,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":63},\"maxValues\":{\"id\":63},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-05662cc4-6a79-4204-aec1-2311a44d8c74-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317339511,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":64},\"maxValues\":{\"id\":64},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-67f1b806-ef5f-4f8a-890b-b3b5ad1d234c-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317339490,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":65},\"maxValues\":{\"id\":65},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-37318455-4128-4e1e-9ab7-5c587ac9fde0-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317339621,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":66},\"maxValues\":{\"id\":66},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-c5d68934-1f5a-40c4-b5be-1233eb15378a-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317339511,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":67},\"maxValues\":{\"id\":67},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-41e40903-13b6-4465-aa3c-bd8cb5e52b18-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317339621,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":68},\"maxValues\":{\"id\":68},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-110c626e-ea13-4204-8cae-a3183d89a4b7-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317339621,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":69},\"maxValues\":{\"id\":69},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_a/_delta_log/00000000000000000008.json b/crates/core/tests/data_err_logs/table_a/_delta_log/00000000000000000008.json new file mode 100644 index 0000000000..ea216c1556 --- /dev/null +++ b/crates/core/tests/data_err_logs/table_a/_delta_log/00000000000000000008.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317340794,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":7,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4818"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"b5902ae9-2aab-46f9-82f9-d68ae45940d7"}} +{"add":{"path":"part-00001-3b62f1d4-2a3e-4611-a55b-e9d2ace11b3c-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317340752,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":70},\"maxValues\":{\"id\":70},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-9c066923-23f4-45f6-b2af-5a4ecbef1707-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317340776,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":71},\"maxValues\":{\"id\":71},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-cfb48b6e-0fc7-4d6b-8ab7-c52f29f71b94-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317340752,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":72},\"maxValues\":{\"id\":72},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-b21a45a5-9a53-4dfa-8327-8a82b6b283e9-c000.snappy.parquet","partitionValues":{},"size":451,"modificationTime":1742317340766,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":73},\"maxValues\":{\"id\":73},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-afa450c4-f649-4c88-817a-6d0bdfc4da6f-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317340765,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":74},\"maxValues\":{\"id\":74},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-dd96cd25-394d-4873-84e7-f2f6b0eb5a67-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317340776,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":75},\"maxValues\":{\"id\":75},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-3a98b7ed-8665-4bc5-8704-6745f7084cd0-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317340776,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":76},\"maxValues\":{\"id\":76},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-8dfd3dc5-cf31-42fc-8c55-2ac70ce9e18d-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317340782,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":77},\"maxValues\":{\"id\":77},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-97e7d287-aee3-445d-a90e-f3b2ef4bd7cd-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317340765,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":78},\"maxValues\":{\"id\":78},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-db2340aa-28ff-4826-b39e-07ba516551e9-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317340779,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":79},\"maxValues\":{\"id\":79},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_a/_delta_log/00000000000000000009.json b/crates/core/tests/data_err_logs/table_a/_delta_log/00000000000000000009.json new file mode 100644 index 0000000000..1f9d5fb05b --- /dev/null +++ b/crates/core/tests/data_err_logs/table_a/_delta_log/00000000000000000009.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317341714,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":8,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4819"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"5379512e-d4d3-42b5-817d-70ecf05f2385"}} +{"add":{"path":"part-00001-4e7175fd-6ffb-4b6a-946c-43aa7c439104-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317341702,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":80},\"maxValues\":{\"id\":80},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-f3cc4f07-93ec-4a47-add1-b16c1149c3d9-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317341667,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":81},\"maxValues\":{\"id\":81},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-07581f2d-ee98-4464-a28b-f738e88749e4-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317341702,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":82},\"maxValues\":{\"id\":82},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-c9d7468d-fc14-445c-8dbd-65d616f8eb05-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317341666,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":83},\"maxValues\":{\"id\":83},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-56eb2260-fb49-4138-a5c8-f0ae0949f4e2-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317341670,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":84},\"maxValues\":{\"id\":84},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-5d15eeca-0fca-4986-a18e-4d86bf5ba2f6-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317341670,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":85},\"maxValues\":{\"id\":85},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-3e13d733-f55d-42ac-be4f-f4400e999c29-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317341702,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":86},\"maxValues\":{\"id\":86},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-22c23f1e-26d2-488e-8e07-2de6ae5fded5-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317341667,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":87},\"maxValues\":{\"id\":87},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-d0debf09-5f82-4c61-8636-27e51fba37e5-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317341674,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":88},\"maxValues\":{\"id\":88},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-7403e0c4-bb07-4a1b-9fca-a01523713f85-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317341702,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":89},\"maxValues\":{\"id\":89},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_a/_delta_log/00000000000000000010.checkpoint.parquet b/crates/core/tests/data_err_logs/table_a/_delta_log/00000000000000000010.checkpoint.parquet new file mode 100644 index 0000000000000000000000000000000000000000..9df253f2f762891c53763e62e57576217a8b289a GIT binary patch literal 21615 zcmeHve{@vWedn9eNJxOjGSQn1ss0cYt8$MXeZSv*J~}DCYRg2%@)&#F)J}Zgd-s8C z{i-xzpbN&@WK1Zf3Lz{>Xcs&FL5b`75Z6szH)~Rs^*WB%OX8GzecHfw-NyBYj6pooH)6&Hv<{p$ON=68rMrSQ2) zY!-w$`36A{UP=i+lL1qsVriLp{0A?WQk6tru+9`fOL!!eZa8>2pI(Xf7#%N4vMejI z6$9Zxb1&s0pGFh|C){f0z+svEMa zOTMYw5>`PZqF7WiJ=yVmSNEvtOWSq}`z?u3L)CQKl{_09fR-&ex~)lyrW4a~45kuY z+H!-iob2#zm%64c*_!G~y5{&2VY(yvo^05jq1dkMPrj476RTyGVi`Ud^L=1CCY5a6 zOx@B+spaaLt;mz0E$fo3II2l3pcxDc!crwi@l1)xOg0_EF+4@#Lv@Mykc3QJNpmb8 z64O1&aWzx&D76(=wsee^wsfQx9qv{PT{c}`f{d2m-)A|Jqsb5^MomvLRFGOQMbnBT;2vavab%Waik0r(%t4(oN}xL~NJ25WD0PebM25%%v7FCBw57 zO>-Qk6MHB0?dVihHONEp2uOn~mZ%Xhbum(Wgc^}mMKWX z)g04dmSkHnUftK>X*Qgg`BYOK%TPSKM4RrkB-8R0hsY+rF)szB$>E;-SHF;q0;@`MRGiG}D_2)n$qJSia*CMN=nf z(|-7Xi6Ce@4q!AyJ>BNMuM=NVJyUm?Ll7ysPfuZ)h)uo))3@Q82*wQDY7Q)d5=GZN zo!aK)+J^nGFdNeLR2VkQ3G;Nrlc09MW^BTAS=_G|`8Q9rwLZa=kn+(G1aWKGwItNNaWn3uYt z8=+p;U^v9Kvwt|Z9d<+u-ACWvayW$$uNp*F7;=c>`k<;oehxB=M2JnGEm*7>g!pzSMKj4MuN{_XT3UAQ zTzXpfeb_IxUCS^u+bngYIyA{f%G2Caf8LCQb+mgGG-M$oDjrnkFb@kyY#^YgTe4}w zhkVVKv-6r)K|Csf`*?=oz@3mz_FTy9P%WbPDzcM{h^VWGtwg2>`B(%j0kPB6CJ(28 zCmW`zx=3p#tT8*%vJ9aXQA$GagMsKCad#e1uSJkz)Ic_dK3&y>4H^(i>eX&&)pZpc z$-$a@yQvrUVpGc@@NFJ>X||;`qf3hx^hlO3dkWGNb^X%e)RZn+Ob1`6Fc=SwpadE) zT?vWBVy2-|nZaJ*tP*myNo=e`@eOd8+TCk7Oj1-$V-8%^B6J%v07(0arueQ!Bv++t zn;G#=J&1&;sw$Rj*_K5ncQs}^uFCt4P8^dRJ=B4i=b6x2_UqYh*HC@c_ASM+cegg= zb%p7&EoXPM-VGnsv0_9T4S4_}K?*liMb!QfyY6-)=Kdc2eiIT$`CfJSR{wLr??;LW8Tq z3>nf3Wds>U^(;zDZYpm{K2;QF`Clw!E($c=P+&$5j9hiHpSfzjZ|ijDWAmmE3|(7w zbjN|mA(bMPBY;q(X)F;HiUphUO4nw35bibGR}};dxABIIYh;JAI&1{%@KHD+L7K=X z%V2Q|Ok750q==*luER?l8|yNW36$(tnlrjq`fj66Bm^N#bz$xBx1(+C7F2A>5AEuN z$rw7qo`clQZAMXK*Hlf0h^A^72BHc| zHUxbSxk{y!nD8b`Rwi2;xV@+<>Hr@xRW-d+W*cD;-qFn+9z&8v1$A`mmiyD)usKim znB&WKc4+?5kM)^II3}teSf8Qz4%I9M8KXS0pnJqlOVX4F{-WR2d?a{JmSh)}LVXvO zIXRXYKwOhGq#Tq92#^RsI`UNZyH_J`$iAMPx~dJOg=xZKvcJ}{jM~JpVEgF7NTlZM z;Jgm_prsqWmEE^M@JiQYS7{QePn3eNI*ow=U0slF*KLGi%l144MYiuuem%P%86GvM z;^~SfdB{wts0c4ZPh zDU|vy?8H_zgaRK%WB5qTDvZmZh%yfI6-zE1Pj`SCGKNC3UuqHb?C&qgzBISjhJ#6t zZNs`yli8&7t@+)se+_ZQheT6)cFlrSNK#k?(!Gh)jY3TGO8;eU8;S$VK-!#-z`z!{ z&AO?dIwqqcM%Bfz?Lb0CuH)WrBM({{GPmYIeW=6Wnn*kblToc0j_Z`*oyt+FYbrw~ zLWD$_sash&hav*?qDfI^BCvWc5;FZt)>_c+BL_1B3hHxQEWp&@RGQ_RvWm>R^Lz`W zVo*)CY@eVYbzu4q>L02g*J{XYn#>GaRrl-`c0jDy9B^BNXmym*FjfR6)rE##SAz^4 z#MM01ui2KL?Vr2+cpr)}lzvd3ixSmzZQJ*=?d_B1oNnyI9CXQ&?__@{W)v-T12!O% zFHCc7xV-S!lf*^;ay)4^DWuWq72q%tR`U(Os z_6zW;?5%A&ycx+Abu7Y~!i$9tc(BYgPcI>oH;J9FF^|D^y+v-1i+zz}qR7=P^VG*P z$Q7Hj+emuKl05i;i@k!RA%vhruryR!2pv9ZHV4InL3!zRH{{?ss9j|gSt>!)(g{)> ziq>pyL8n6v#WW0sAzLH+Dv$tnw2+1CFM0<7@2J=VIp`X)jt5omypZXESWycYjx0;+ z1OGIDG7a(1gpWG1>$r$fs9sCQ<_Ox+IYo+k!9#+hFj17oD2PzRsVG30jUp~9e+03p z9ccsm!lQ@4DKZjv0$4A$jVh{U$Ay+18&wQ<4X^r8bD^k^N(LyUr9LC z*p})R8kaZs3Js}t0bo;2;x`+_CtrW?$Bn(B!?AZXESdPv`r$7ESbx5yIrHEDrlnbM zTbncOPqegUPJbI+cfEiv(kf>@e-K?wFX5BZN-}?UPfMGiwRQ_jTbqU7XdMtff}i)d zZWs2o_MtDIx$2pgWx`17m@tk(U&R0OTRVjf_@Zm7MHb#@J(T(1e-4Q6x9$)=g(=%w zW#Jn9KNs!$TE8LuE!u6Zd10WH2zy#L3Eyk&%>1`+x7;nf2|DWmcMhFRt;>Y>THA!} zXwPfS_ul(LxEAyI@FxWIjgdrI2Qz@#ZUs={Ue--WMd$_0n3*@Ty z8^5!v<%_59_~^dD|9WiUUHdxsKe+WD=J?CIuXFGC{EES)3zW_SZ3hm0>CwK&9{ZOs z?RfCyQ@{V-)c^RWbl$2x;dpIGtFm3qDfI{v-IBO8Vb%pD!~CKg|(^ei48 z9J!qiZD9EHdy5CX#n;(nD|+ds+po_FOGY*f4XqsAFj8FH+S4p=-Ma5Kq1jNkuD(rZ z%)N1&kTK0Qy!qyBLg8@KrD5!))tJK= z;pRP$Hx$+iDRIrM!j~UyD4a(>r}St;G|4VJo=OFLXiCh-T8!f0c9}THL~ICJ;X?GS zQ8*W(2zt3MV_9o%4fz2Rk7UX)Uu_IqW(7={h+3WunD;{gfN+_JQYeoj@wa*)C@kZs zz2bk(4HmlZp*e-Vxxq5g6s*FJK^r5oM)QiFMxaWe5OGclH#LO}CF=^;%4PEtEP(&y z5L^mMz%=i%KLOg+B?kfF@*E7L8t5Qx;9? zhbwN0L}$lKwE}K97iL*!&_y38an#4sx1Y^qw)VVvt5CS90jqyZ`0|g_h1G!M><&cB zH5c_>|KoI^WlUZJ=K17vslq#RLWw<|0{lbKqKv}9XyGIf7)N?9n)7(7a4sFsQ5L&QW|IwjMQ(g zcpS6%H;07V`!k>X(wr|Jf*JF^Ljqh%&{KN@P4gi?jl^*)A1#pVWb=wckr8NxvN>1~ zc-hPBLVv`mQuxZ8YVPc^V^+9v`7jUo? zz-BvWDN3H_PJZog=T$;|^o}xL4S<>hzM|_cpz;c4`@j6%v1CGZ1;bf@B?UaxH~K_M zd^shSo_%{kB{zdVD09Q5^xHvNE^Z!Ui?3TeQ5@gk78my{_6OMzPtm`Fj7^96S4hr1 zi(7x~;NVD+tz+Z(z_T-ThVUh3F=TC4#8G;YB9(95de_!yZ9n+V$vw&SWOecXE{X&xDjd*4l(&)gIDo=uw1-W&IxzgIj^7U2G2adpJR>7@C@i0DMUyGLVL zzL_+SkBQsU(TL;Y@rZ{fVjY~Hh3`~314-T=LqAdYe z$A@DNzpzRIvZhe2LU%i6%Qo?Th zxj#&a?YZx77CVKm)g5A{Vs1GdB(qmn&weGC{a!G87cOQhr&s2W1Y%dJ8si^dahPgLt*^7CA^N49a*?(Dx5QnfyJ_E%%=53o)ISZ@SaK@1>VpH-DjWknON zxi8n>^+pZklL6$90>~pb#eO*xWK}VgpdPuYn$cqc>Yn@gqAvh79V%lc)ToY1&@R-V zy%vfvKznh`CDAGiu>|tPHPsBB2_+do?z=e-X)`f6ucDGmAotx|4O!a575$?C^7LgO zD@wrx@^lU4fdF#P_7s=nlLIlEfdePiV*>HyKs8rK0*E6a#JMVnixr#+;JF&W7XrW& zA>i>_Vv7lQnxRHbu#VqS&D`IIA`7wh6sBV}1_f<6efMWmxieRH?J4lQaP#F4w=Q`y z6<$C`*VDL!&d&ny(x+v?jdlW98A#s667#t|52S*GG3h01j0*IMjK z_0+!!jNljm%IP-(s4e6}nQ^pZHE0h9Xs?85C6>g=2(2bma+ngUX6hRO%$X48g<6;y zHw_72FVq0O8~{$?*X@s|NP50CwuLVc1xIIoKKF zI!AtTeKo_+1;|f@$Y+3D4(u~CJT10!#4|OBCj!J*Lc}A3mqVknA(}To$$_&U4sj?y2T>?-v6X4`I(Hu-%bZt3y3U zIa`B*+b&qsD_7BgnwqU;=uI~EWYK*BC&;Ja> z^Zbs$9!}2$QMtoUkWSYieP$n5-VZ{gCq`msNG#~4@Khi^4G7Ubm2^73uR?$K(gJQW~62V}(ZH|s&J zjJX{9%^K|E0ru-5_HjIZtTD)GXBwf9K9kOEPj?+3>&^f6mELh&^mhD1gyyRJg=Dnp3CXq^W$u6l#(i#qsfKgL^kHiUYV#fx$gIQ42l& zY-J`(=KJ!8C#r#W{#)*q&xOF}>j17cCEiL%gD+ISH%(+=knGNp~PC389;^HxaaB*;VsW4xHE)4q!s)5(q1IzJl#AJ`oj z)6@6O06hIL@FKuZ-&YO1dw*a|A@I~D4jdlgWv8DJMrVVePw+C0M zwd`Et4PwQ$Ydr4*SXbp8?FYvRFLaan)z4clQu-y@sJH~a0+foaXo6CwjV3qE^+D7T zLE^jX`Y0sQy2xes2ZgJ>9Z!(z4dB7aW4+VskX?A67ZZpb_Y$6RWdXSCqVZjSOuNqL z_cMuFGd(Cs7^*u6{{NI+%8L4BEKMHNB&Jb!ME+$g4U67cd3hZHto+U;R?TceSnv-H zB7W+P$icr^D3|bcR|plaY+{b<1__Vf;`y^#arxm+JgIJ?(L0|QAg20rshI1qh{}gV zF}HOuHxA{|yQ3H>UN;>dr<>6oy*sMj8sPEK2i_G-d$?3f`R5|>q&%v4_!RTnBa4$sw(w5|YZ`qQo$C|=NqXgAZ3gMJRA>u}5ICrh=wT|&>{ z)ObqWzC5!#)eRH9eM|V5Wqo*AA>FlA$gf7g;m^dR;qlJs)TUEes%T4<&TePJ;E>np zIXZp=pd#a!FFN>Th77;EQN!;J!>`lxC{s-SJE8CkJ{)>2p5@_*_5b641bwL>4B%Id F{vT}G{2u@S literal 0 HcmV?d00001 diff --git a/crates/core/tests/data_err_logs/table_a/_delta_log/00000000000000000010.json b/crates/core/tests/data_err_logs/table_a/_delta_log/00000000000000000010.json new file mode 100644 index 0000000000..c1f8c8b095 --- /dev/null +++ b/crates/core/tests/data_err_logs/table_a/_delta_log/00000000000000000010.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317342605,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":9,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4818"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"ba4b200d-e0ac-4715-ad4e-bed8ef1b20cd"}} +{"add":{"path":"part-00001-55fbea2e-0788-438a-a50c-65f809acc05b-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317342581,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":90},\"maxValues\":{\"id\":90},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-0d891eaf-0e01-46a6-879e-49bbca90c215-c000.snappy.parquet","partitionValues":{},"size":451,"modificationTime":1742317342581,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":91},\"maxValues\":{\"id\":91},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-569d12e9-04d5-4fe2-9554-f288047f3386-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317342592,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":92},\"maxValues\":{\"id\":92},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-1bc89ec9-8092-49e1-9b1f-123ae50e3d40-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317342579,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":93},\"maxValues\":{\"id\":93},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-b4a223d6-1d87-49c9-84c9-a85eece61839-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317342591,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":94},\"maxValues\":{\"id\":94},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-4c3b6be7-979c-4f42-8920-efa32b751d97-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317342578,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":95},\"maxValues\":{\"id\":95},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-6c224359-8995-417a-8b24-b2e530327bc6-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317342581,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":96},\"maxValues\":{\"id\":96},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-2a40eb21-34d2-48ca-aaa5-55db674f56de-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317342590,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":97},\"maxValues\":{\"id\":97},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-ca8256ed-98cd-460d-8de2-9f6f7f388703-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317342579,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":98},\"maxValues\":{\"id\":98},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-f131fc78-c201-4e8d-b194-222b2e79778d-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317342578,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":99},\"maxValues\":{\"id\":99},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_a/_delta_log/00000000000000000011.json b/crates/core/tests/data_err_logs/table_a/_delta_log/00000000000000000011.json new file mode 100644 index 0000000000..05863f6cd0 --- /dev/null +++ b/crates/core/tests/data_err_logs/table_a/_delta_log/00000000000000000011.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317349152,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":10,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4818"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"1657fc30-879d-4b0b-972a-4e3a079fdd7a"}} +{"add":{"path":"part-00001-ceaadd5e-615b-455d-8f4b-052b9c94c7b6-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349136,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":100},\"maxValues\":{\"id\":100},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-b7dba1e7-b1e5-4f02-a223-69ec7353ab45-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349123,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":101},\"maxValues\":{\"id\":101},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-f099cf4d-d418-4852-8580-091908847a66-c000.snappy.parquet","partitionValues":{},"size":451,"modificationTime":1742317349122,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":102},\"maxValues\":{\"id\":102},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-418e8d25-7316-442a-9bc8-616ed01231eb-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349136,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":103},\"maxValues\":{\"id\":103},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-7538a9c2-1ccb-4150-b162-ef8d826fe30f-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349123,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":104},\"maxValues\":{\"id\":104},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-7619f42c-5bc4-4e77-b037-f36481c8b63c-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349123,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":105},\"maxValues\":{\"id\":105},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-eb49d85f-91cc-4293-9339-a664ee905b0f-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349134,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":106},\"maxValues\":{\"id\":106},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-81e22719-7705-4703-b2dd-c4e2982217a7-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349122,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":107},\"maxValues\":{\"id\":107},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-883858d1-9df6-4b55-a2be-5b8387134617-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349122,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":108},\"maxValues\":{\"id\":108},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-07d91938-ac89-48cc-a657-6067d2d9f67e-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349137,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":109},\"maxValues\":{\"id\":109},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_a/_delta_log/00000000000000000012.json b/crates/core/tests/data_err_logs/table_a/_delta_log/00000000000000000012.json new file mode 100644 index 0000000000..4cc44fa8e8 --- /dev/null +++ b/crates/core/tests/data_err_logs/table_a/_delta_log/00000000000000000012.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317349950,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":11,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4818"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"9a035bdd-f892-4449-9c39-401f31fcada6"}} +{"add":{"path":"part-00001-f3b19100-b5b3-4e72-8658-7a937e9ed515-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349924,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":110},\"maxValues\":{\"id\":110},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-54f2324a-e97f-4def-9101-9cc10599ba06-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349919,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":111},\"maxValues\":{\"id\":111},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-3f7ca40a-6497-4208-8a1a-11062456a5a9-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349936,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":112},\"maxValues\":{\"id\":112},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-a14852b2-c743-4a4a-b9c1-0c9472c51699-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349929,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":113},\"maxValues\":{\"id\":113},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-97d06207-5584-43df-afc2-2d1738d79193-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349943,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":114},\"maxValues\":{\"id\":114},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-0d431f03-6dbf-40e7-96fc-b1ebbbe9fc65-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349922,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":115},\"maxValues\":{\"id\":115},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-af0f0232-33c8-4315-821b-8bb1323b7a26-c000.snappy.parquet","partitionValues":{},"size":451,"modificationTime":1742317349936,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":116},\"maxValues\":{\"id\":116},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-4f744428-d088-497e-afd3-0b374e453e7c-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349936,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":117},\"maxValues\":{\"id\":117},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-694064b8-137e-45cd-b2ea-e28af172a2dc-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349918,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":118},\"maxValues\":{\"id\":118},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-56073753-4c1c-4a68-9b4a-13ef5d1a75fb-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349938,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":119},\"maxValues\":{\"id\":119},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_a/_delta_log/00000000000000000013.json b/crates/core/tests/data_err_logs/table_a/_delta_log/00000000000000000013.json new file mode 100644 index 0000000000..b2d03d3ead --- /dev/null +++ b/crates/core/tests/data_err_logs/table_a/_delta_log/00000000000000000013.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317350712,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":12,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4819"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"c3cd0fa1-9c72-4344-8225-0b787e52d5e0"}} +{"add":{"path":"part-00001-7a0d95f8-e122-4cf6-b89c-389036a9b415-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317350696,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":120},\"maxValues\":{\"id\":120},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-f1f035c1-bf0f-485c-950d-c81d0d2aa8a2-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317350706,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":121},\"maxValues\":{\"id\":121},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-d7a51e45-70f3-4379-819b-341951abefff-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317350691,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":122},\"maxValues\":{\"id\":122},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-4828722c-5799-4be1-ace1-14bd7f477dbf-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317350691,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":123},\"maxValues\":{\"id\":123},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-f3c3c72e-5d71-4dc9-9e15-342f1d6cb6cc-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317350701,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":124},\"maxValues\":{\"id\":124},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-e8d74ede-8876-4f55-8e9f-1bbde0d07a35-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317350696,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":125},\"maxValues\":{\"id\":125},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-91052146-2292-45c3-b57e-1fd2dd6be6ed-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317350692,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":126},\"maxValues\":{\"id\":126},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-9d203964-9f4c-4c84-ad77-9ba305bb6572-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317350706,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":127},\"maxValues\":{\"id\":127},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-8362228b-acf6-4937-875b-26c013c342e1-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317350690,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":128},\"maxValues\":{\"id\":128},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-6f57658e-1953-4b59-b504-27c9e8c5cc3b-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317350677,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":129},\"maxValues\":{\"id\":129},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_a/_delta_log/_last_checkpoint b/crates/core/tests/data_err_logs/table_a/_delta_log/_last_checkpoint new file mode 100644 index 0000000000..604c2b31a2 --- /dev/null +++ b/crates/core/tests/data_err_logs/table_a/_delta_log/_last_checkpoint @@ -0,0 +1 @@ +{"version":10,"size":102,"sizeInBytes":21615,"numOfAddFiles":100,"checkpointSchema":{"type":"struct","fields":[{"name":"txn","type":{"type":"struct","fields":[{"name":"appId","type":"string","nullable":true,"metadata":{}},{"name":"version","type":"long","nullable":true,"metadata":{}},{"name":"lastUpdated","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"add","type":{"type":"struct","fields":[{"name":"path","type":"string","nullable":true,"metadata":{}},{"name":"partitionValues","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"size","type":"long","nullable":true,"metadata":{}},{"name":"modificationTime","type":"long","nullable":true,"metadata":{}},{"name":"dataChange","type":"boolean","nullable":true,"metadata":{}},{"name":"tags","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"deletionVector","type":{"type":"struct","fields":[{"name":"storageType","type":"string","nullable":true,"metadata":{}},{"name":"pathOrInlineDv","type":"string","nullable":true,"metadata":{}},{"name":"offset","type":"integer","nullable":true,"metadata":{}},{"name":"sizeInBytes","type":"integer","nullable":true,"metadata":{}},{"name":"cardinality","type":"long","nullable":true,"metadata":{}},{"name":"maxRowIndex","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"baseRowId","type":"long","nullable":true,"metadata":{}},{"name":"defaultRowCommitVersion","type":"long","nullable":true,"metadata":{}},{"name":"clusteringProvider","type":"string","nullable":true,"metadata":{}},{"name":"stats","type":"string","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"remove","type":{"type":"struct","fields":[{"name":"path","type":"string","nullable":true,"metadata":{}},{"name":"deletionTimestamp","type":"long","nullable":true,"metadata":{}},{"name":"dataChange","type":"boolean","nullable":true,"metadata":{}},{"name":"extendedFileMetadata","type":"boolean","nullable":true,"metadata":{}},{"name":"partitionValues","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"size","type":"long","nullable":true,"metadata":{}},{"name":"deletionVector","type":{"type":"struct","fields":[{"name":"storageType","type":"string","nullable":true,"metadata":{}},{"name":"pathOrInlineDv","type":"string","nullable":true,"metadata":{}},{"name":"offset","type":"integer","nullable":true,"metadata":{}},{"name":"sizeInBytes","type":"integer","nullable":true,"metadata":{}},{"name":"cardinality","type":"long","nullable":true,"metadata":{}},{"name":"maxRowIndex","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"baseRowId","type":"long","nullable":true,"metadata":{}},{"name":"defaultRowCommitVersion","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"metaData","type":{"type":"struct","fields":[{"name":"id","type":"string","nullable":true,"metadata":{}},{"name":"name","type":"string","nullable":true,"metadata":{}},{"name":"description","type":"string","nullable":true,"metadata":{}},{"name":"format","type":{"type":"struct","fields":[{"name":"provider","type":"string","nullable":true,"metadata":{}},{"name":"options","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"schemaString","type":"string","nullable":true,"metadata":{}},{"name":"partitionColumns","type":{"type":"array","elementType":"string","containsNull":true},"nullable":true,"metadata":{}},{"name":"configuration","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"createdTime","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"protocol","type":{"type":"struct","fields":[{"name":"minReaderVersion","type":"integer","nullable":true,"metadata":{}},{"name":"minWriterVersion","type":"integer","nullable":true,"metadata":{}},{"name":"readerFeatures","type":{"type":"array","elementType":"string","containsNull":true},"nullable":true,"metadata":{}},{"name":"writerFeatures","type":{"type":"array","elementType":"string","containsNull":true},"nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"domainMetadata","type":{"type":"struct","fields":[{"name":"domain","type":"string","nullable":true,"metadata":{}},{"name":"configuration","type":"string","nullable":true,"metadata":{}},{"name":"removed","type":"boolean","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}}]},"checksum":"94a578f92841fa7ba9cdee96b5905fdb"} diff --git a/crates/core/tests/data_err_logs/table_b/_delta_log/00000000000000000000.json b/crates/core/tests/data_err_logs/table_b/_delta_log/00000000000000000000.json new file mode 100644 index 0000000000..0316f09771 --- /dev/null +++ b/crates/core/tests/data_err_logs/table_b/_delta_log/00000000000000000000.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1742317262289,"operation":"CREATE TABLE","operationParameters":{"partitionBy":"[]","clusterBy":"[]","description":null,"isManaged":"false","properties":"{}"},"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"c0983a7a-aeca-4ba8-a509-d5dbc71a10de"}} +{"metaData":{"id":"5f54ef5f-e511-4114-b6e5-f6c206c068b6","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1742317261939}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":1}} diff --git a/crates/core/tests/data_err_logs/table_b/_delta_log/00000000000000000001.json b/crates/core/tests/data_err_logs/table_b/_delta_log/00000000000000000001.json new file mode 100644 index 0000000000..23002c6f58 --- /dev/null +++ b/crates/core/tests/data_err_logs/table_b/_delta_log/00000000000000000001.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317316973,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":0,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4819"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"a1c49e36-8fcd-46e6-8ede-5b8560f7ec3b"}} +{"add":{"path":"part-00001-665397f5-1435-4478-a598-ca226c99ffcf-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317316825,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":0},\"maxValues\":{\"id\":0},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-fd50becf-074e-4a1f-985b-01529e9f7b03-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317316820,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":1},\"maxValues\":{\"id\":1},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-4fc1f70f-9daa-46e6-83b5-ea8144d4a96d-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317316822,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":2},\"maxValues\":{\"id\":2},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-efb0808c-3b7f-4a4d-bc36-daa91c074b5b-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317316825,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":3},\"maxValues\":{\"id\":3},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-c8664e02-01fe-4c2d-8eba-ae84012d7aad-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317316825,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":4},\"maxValues\":{\"id\":4},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-7446ef47-3110-4c3f-a2d0-0c71bafc893a-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317316820,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":5},\"maxValues\":{\"id\":5},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-ee372cff-6aae-4979-970b-88cc154a31bd-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317316822,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":6},\"maxValues\":{\"id\":6},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-9ea59fdf-fc26-4650-a282-9c2cc1906c7c-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317316823,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":7},\"maxValues\":{\"id\":7},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-d50ebfbf-e534-4bc8-b63d-437f6029da6e-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317316823,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":8},\"maxValues\":{\"id\":8},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-96e67376-3d15-4895-bd5d-5e0a325bcb83-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317316823,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":9},\"maxValues\":{\"id\":9},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_b/_delta_log/00000000000000000002.json b/crates/core/tests/data_err_logs/table_b/_delta_log/00000000000000000002.json new file mode 100644 index 0000000000..86e3c7470c --- /dev/null +++ b/crates/core/tests/data_err_logs/table_b/_delta_log/00000000000000000002.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317326453,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":1,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4818"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"ac59851b-981e-4e82-96ea-36a543cfe254"}} +{"add":{"path":"part-00001-6491d41d-d498-4a89-a291-92d964035606-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317326374,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":10},\"maxValues\":{\"id\":10},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-d359921a-3cb1-454d-8aa0-ac5c830fcdc5-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317326373,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":11},\"maxValues\":{\"id\":11},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-b2aae64d-1fab-4106-bc87-2454e945dada-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317326373,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":12},\"maxValues\":{\"id\":12},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-cca989ea-d56e-4e1e-a4ba-538ef7801997-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317326373,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":13},\"maxValues\":{\"id\":13},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-14706643-f3f0-4ba9-8282-7d55bb4ecacb-c000.snappy.parquet","partitionValues":{},"size":451,"modificationTime":1742317326374,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":14},\"maxValues\":{\"id\":14},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-25801ed5-1cf2-43fa-bbdb-8898fc102e64-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317326373,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":15},\"maxValues\":{\"id\":15},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-03fbeb6f-b8a5-448e-afa7-0f49fca61866-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317326374,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":16},\"maxValues\":{\"id\":16},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-08361c76-870e-4ddf-9153-f67852849ec3-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317326374,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":17},\"maxValues\":{\"id\":17},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-368e738e-0673-4e76-a1ff-5ba9c755396e-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317326373,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":18},\"maxValues\":{\"id\":18},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-2c9a3837-e2c5-42bd-b888-f3205f4b894c-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317326374,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":19},\"maxValues\":{\"id\":19},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_b/_delta_log/00000000000000000003.json b/crates/core/tests/data_err_logs/table_b/_delta_log/00000000000000000003.json new file mode 100644 index 0000000000..14665bcd27 --- /dev/null +++ b/crates/core/tests/data_err_logs/table_b/_delta_log/00000000000000000003.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317330682,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":2,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4818"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"5e6fdfee-2a82-40d1-bc83-47b3f0f3f21e"}} +{"add":{"path":"part-00001-b7be8377-b715-4234-b316-201fd2c9c142-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317330578,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":20},\"maxValues\":{\"id\":20},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-719b1086-b19c-45d1-8c4c-c11db02e2e0b-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317330581,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":21},\"maxValues\":{\"id\":21},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-eb0fd03c-3e47-42ed-9897-e79dd1567fb1-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317330577,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":22},\"maxValues\":{\"id\":22},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-48b99dee-6d3e-4cfb-b651-4769de7f5b24-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317330577,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":23},\"maxValues\":{\"id\":23},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-b873a231-8352-4bac-b6f1-b53ee738d212-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317330582,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":24},\"maxValues\":{\"id\":24},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-74ed051c-b116-4947-b62c-2086bcd5bb90-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317330577,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":25},\"maxValues\":{\"id\":25},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-680d9e49-afce-4a7d-bca8-b03438c2fd74-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317330577,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":26},\"maxValues\":{\"id\":26},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-81c10052-aacc-4ecf-b9cf-64f81b3bd435-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317330577,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":27},\"maxValues\":{\"id\":27},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-cf9cac69-932c-43bf-8e4b-fd059d519c0f-c000.snappy.parquet","partitionValues":{},"size":451,"modificationTime":1742317330577,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":28},\"maxValues\":{\"id\":28},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-43e3252c-8ac7-4c7a-bcb4-15aaf7ae95b9-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317330581,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":29},\"maxValues\":{\"id\":29},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_b/_delta_log/00000000000000000004.json b/crates/core/tests/data_err_logs/table_b/_delta_log/00000000000000000004.json new file mode 100644 index 0000000000..ed86e283d5 --- /dev/null +++ b/crates/core/tests/data_err_logs/table_b/_delta_log/00000000000000000004.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317333588,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":3,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4818"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"57611e9e-5312-4502-a3ad-c0c78799773e"}} +{"add":{"path":"part-00001-5c92b4bb-af84-4066-8aeb-1e493b7147df-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317333524,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":30},\"maxValues\":{\"id\":30},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-b050a084-ab25-420f-bb7b-50eb95d25e4e-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317333551,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":31},\"maxValues\":{\"id\":31},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-1880b504-fcf5-4f43-92d4-c43e8dd9d7d9-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317333550,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":32},\"maxValues\":{\"id\":32},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-4af9173b-8472-41cd-8772-e2bdb084c5d5-c000.snappy.parquet","partitionValues":{},"size":451,"modificationTime":1742317333524,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":33},\"maxValues\":{\"id\":33},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-89cec234-f844-4802-a786-5d9133bbe489-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317333523,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":34},\"maxValues\":{\"id\":34},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-fa11b282-ec0d-4513-9baf-2b84c5f94a12-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317333550,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":35},\"maxValues\":{\"id\":35},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-895723cb-0dba-4019-a2a9-e6db9a937c91-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317333550,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":36},\"maxValues\":{\"id\":36},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-dca57e7a-f859-4b39-bc43-03e1061f1b4e-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317333551,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":37},\"maxValues\":{\"id\":37},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-96756753-7714-4c07-a238-d5b57f42a8ce-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317333523,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":38},\"maxValues\":{\"id\":38},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-9a7b861f-5d9a-41c4-b4ec-7f0d1391acfe-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317333551,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":39},\"maxValues\":{\"id\":39},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_b/_delta_log/00000000000000000005.json b/crates/core/tests/data_err_logs/table_b/_delta_log/00000000000000000005.json new file mode 100644 index 0000000000..f77fd655fe --- /dev/null +++ b/crates/core/tests/data_err_logs/table_b/_delta_log/00000000000000000005.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317336099,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":4,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4819"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"203edd74-d2b2-46fe-935a-6222cfb888d4"}} +{"add":{"path":"part-00001-a24fe71c-ba35-47bb-8f3e-636d5991d5ae-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317336058,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":40},\"maxValues\":{\"id\":40},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-a56ad50f-bc64-44cb-bb55-e2d177947b3d-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317336079,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":41},\"maxValues\":{\"id\":41},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-95c53cf1-b472-4c34-b728-1dd7cbed8b2f-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317336074,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":42},\"maxValues\":{\"id\":42},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-7e442ced-e810-44d9-9d28-3027e652a0ec-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317336080,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":43},\"maxValues\":{\"id\":43},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-68248457-2fa3-407e-9de3-759b1e052b99-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317336075,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":44},\"maxValues\":{\"id\":44},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-89a48ead-5bf3-4d16-aada-97c11386fcaf-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317336076,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":45},\"maxValues\":{\"id\":45},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-42b618de-c46c-4888-9b48-b99493ec2983-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317336070,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":46},\"maxValues\":{\"id\":46},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-4dc49305-f4f8-4ec9-9a40-8f4b3bd81324-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317336055,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":47},\"maxValues\":{\"id\":47},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-d7bb17a9-223e-474b-9d78-2c745cc35a4b-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317336054,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":48},\"maxValues\":{\"id\":48},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-68a79bb6-a31e-49bf-848f-2d64ceb834c0-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317336079,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":49},\"maxValues\":{\"id\":49},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_b/_delta_log/00000000000000000006.json b/crates/core/tests/data_err_logs/table_b/_delta_log/00000000000000000006.json new file mode 100644 index 0000000000..28116ba9f2 --- /dev/null +++ b/crates/core/tests/data_err_logs/table_b/_delta_log/00000000000000000006.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317338700,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":5,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4818"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"9ee4b86b-1bbe-4d6e-adbd-8dd4961989fb"}} +{"add":{"path":"part-00001-a8fc5b00-29e4-4a99-961d-b0cbcc23d165-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317338678,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":50},\"maxValues\":{\"id\":50},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-5a3079f8-abbc-4b5f-a1e3-340830e59222-c000.snappy.parquet","partitionValues":{},"size":451,"modificationTime":1742317338670,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":51},\"maxValues\":{\"id\":51},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-567d7b20-b6ce-4e96-b500-caa34c80f8a7-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317338676,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":52},\"maxValues\":{\"id\":52},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-b794df4b-174f-468a-9de7-2aa865ba7014-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317338676,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":53},\"maxValues\":{\"id\":53},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-a15406cf-c141-4f7b-b302-e4b5a145cad5-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317338675,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":54},\"maxValues\":{\"id\":54},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-8a52349c-d93b-4c59-b493-13486bb5e284-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317338680,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":55},\"maxValues\":{\"id\":55},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-1734b4eb-4414-4b3a-8e99-1bd099c9e6b5-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317338670,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":56},\"maxValues\":{\"id\":56},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-02781f2a-6c34-42ca-80a4-e830b2eeb963-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317338676,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":57},\"maxValues\":{\"id\":57},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-63ec1c21-c31c-43d4-b5c9-9c206aeeb280-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317338670,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":58},\"maxValues\":{\"id\":58},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-b732e8e4-7d1b-470d-89a5-86a3f8d8bdc2-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317338680,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":59},\"maxValues\":{\"id\":59},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_b/_delta_log/00000000000000000008.json b/crates/core/tests/data_err_logs/table_b/_delta_log/00000000000000000008.json new file mode 100644 index 0000000000..ea216c1556 --- /dev/null +++ b/crates/core/tests/data_err_logs/table_b/_delta_log/00000000000000000008.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317340794,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":7,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4818"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"b5902ae9-2aab-46f9-82f9-d68ae45940d7"}} +{"add":{"path":"part-00001-3b62f1d4-2a3e-4611-a55b-e9d2ace11b3c-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317340752,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":70},\"maxValues\":{\"id\":70},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-9c066923-23f4-45f6-b2af-5a4ecbef1707-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317340776,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":71},\"maxValues\":{\"id\":71},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-cfb48b6e-0fc7-4d6b-8ab7-c52f29f71b94-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317340752,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":72},\"maxValues\":{\"id\":72},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-b21a45a5-9a53-4dfa-8327-8a82b6b283e9-c000.snappy.parquet","partitionValues":{},"size":451,"modificationTime":1742317340766,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":73},\"maxValues\":{\"id\":73},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-afa450c4-f649-4c88-817a-6d0bdfc4da6f-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317340765,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":74},\"maxValues\":{\"id\":74},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-dd96cd25-394d-4873-84e7-f2f6b0eb5a67-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317340776,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":75},\"maxValues\":{\"id\":75},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-3a98b7ed-8665-4bc5-8704-6745f7084cd0-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317340776,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":76},\"maxValues\":{\"id\":76},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-8dfd3dc5-cf31-42fc-8c55-2ac70ce9e18d-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317340782,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":77},\"maxValues\":{\"id\":77},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-97e7d287-aee3-445d-a90e-f3b2ef4bd7cd-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317340765,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":78},\"maxValues\":{\"id\":78},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-db2340aa-28ff-4826-b39e-07ba516551e9-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317340779,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":79},\"maxValues\":{\"id\":79},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_b/_delta_log/00000000000000000009.json b/crates/core/tests/data_err_logs/table_b/_delta_log/00000000000000000009.json new file mode 100644 index 0000000000..1f9d5fb05b --- /dev/null +++ b/crates/core/tests/data_err_logs/table_b/_delta_log/00000000000000000009.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317341714,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":8,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4819"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"5379512e-d4d3-42b5-817d-70ecf05f2385"}} +{"add":{"path":"part-00001-4e7175fd-6ffb-4b6a-946c-43aa7c439104-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317341702,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":80},\"maxValues\":{\"id\":80},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-f3cc4f07-93ec-4a47-add1-b16c1149c3d9-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317341667,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":81},\"maxValues\":{\"id\":81},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-07581f2d-ee98-4464-a28b-f738e88749e4-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317341702,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":82},\"maxValues\":{\"id\":82},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-c9d7468d-fc14-445c-8dbd-65d616f8eb05-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317341666,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":83},\"maxValues\":{\"id\":83},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-56eb2260-fb49-4138-a5c8-f0ae0949f4e2-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317341670,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":84},\"maxValues\":{\"id\":84},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-5d15eeca-0fca-4986-a18e-4d86bf5ba2f6-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317341670,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":85},\"maxValues\":{\"id\":85},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-3e13d733-f55d-42ac-be4f-f4400e999c29-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317341702,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":86},\"maxValues\":{\"id\":86},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-22c23f1e-26d2-488e-8e07-2de6ae5fded5-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317341667,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":87},\"maxValues\":{\"id\":87},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-d0debf09-5f82-4c61-8636-27e51fba37e5-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317341674,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":88},\"maxValues\":{\"id\":88},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-7403e0c4-bb07-4a1b-9fca-a01523713f85-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317341702,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":89},\"maxValues\":{\"id\":89},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_b/_delta_log/00000000000000000010.checkpoint.parquet b/crates/core/tests/data_err_logs/table_b/_delta_log/00000000000000000010.checkpoint.parquet new file mode 100644 index 0000000000000000000000000000000000000000..9df253f2f762891c53763e62e57576217a8b289a GIT binary patch literal 21615 zcmeHve{@vWedn9eNJxOjGSQn1ss0cYt8$MXeZSv*J~}DCYRg2%@)&#F)J}Zgd-s8C z{i-xzpbN&@WK1Zf3Lz{>Xcs&FL5b`75Z6szH)~Rs^*WB%OX8GzecHfw-NyBYj6pooH)6&Hv<{p$ON=68rMrSQ2) zY!-w$`36A{UP=i+lL1qsVriLp{0A?WQk6tru+9`fOL!!eZa8>2pI(Xf7#%N4vMejI z6$9Zxb1&s0pGFh|C){f0z+svEMa zOTMYw5>`PZqF7WiJ=yVmSNEvtOWSq}`z?u3L)CQKl{_09fR-&ex~)lyrW4a~45kuY z+H!-iob2#zm%64c*_!G~y5{&2VY(yvo^05jq1dkMPrj476RTyGVi`Ud^L=1CCY5a6 zOx@B+spaaLt;mz0E$fo3II2l3pcxDc!crwi@l1)xOg0_EF+4@#Lv@Mykc3QJNpmb8 z64O1&aWzx&D76(=wsee^wsfQx9qv{PT{c}`f{d2m-)A|Jqsb5^MomvLRFGOQMbnBT;2vavab%Waik0r(%t4(oN}xL~NJ25WD0PebM25%%v7FCBw57 zO>-Qk6MHB0?dVihHONEp2uOn~mZ%Xhbum(Wgc^}mMKWX z)g04dmSkHnUftK>X*Qgg`BYOK%TPSKM4RrkB-8R0hsY+rF)szB$>E;-SHF;q0;@`MRGiG}D_2)n$qJSia*CMN=nf z(|-7Xi6Ce@4q!AyJ>BNMuM=NVJyUm?Ll7ysPfuZ)h)uo))3@Q82*wQDY7Q)d5=GZN zo!aK)+J^nGFdNeLR2VkQ3G;Nrlc09MW^BTAS=_G|`8Q9rwLZa=kn+(G1aWKGwItNNaWn3uYt z8=+p;U^v9Kvwt|Z9d<+u-ACWvayW$$uNp*F7;=c>`k<;oehxB=M2JnGEm*7>g!pzSMKj4MuN{_XT3UAQ zTzXpfeb_IxUCS^u+bngYIyA{f%G2Caf8LCQb+mgGG-M$oDjrnkFb@kyY#^YgTe4}w zhkVVKv-6r)K|Csf`*?=oz@3mz_FTy9P%WbPDzcM{h^VWGtwg2>`B(%j0kPB6CJ(28 zCmW`zx=3p#tT8*%vJ9aXQA$GagMsKCad#e1uSJkz)Ic_dK3&y>4H^(i>eX&&)pZpc z$-$a@yQvrUVpGc@@NFJ>X||;`qf3hx^hlO3dkWGNb^X%e)RZn+Ob1`6Fc=SwpadE) zT?vWBVy2-|nZaJ*tP*myNo=e`@eOd8+TCk7Oj1-$V-8%^B6J%v07(0arueQ!Bv++t zn;G#=J&1&;sw$Rj*_K5ncQs}^uFCt4P8^dRJ=B4i=b6x2_UqYh*HC@c_ASM+cegg= zb%p7&EoXPM-VGnsv0_9T4S4_}K?*liMb!QfyY6-)=Kdc2eiIT$`CfJSR{wLr??;LW8Tq z3>nf3Wds>U^(;zDZYpm{K2;QF`Clw!E($c=P+&$5j9hiHpSfzjZ|ijDWAmmE3|(7w zbjN|mA(bMPBY;q(X)F;HiUphUO4nw35bibGR}};dxABIIYh;JAI&1{%@KHD+L7K=X z%V2Q|Ok750q==*luER?l8|yNW36$(tnlrjq`fj66Bm^N#bz$xBx1(+C7F2A>5AEuN z$rw7qo`clQZAMXK*Hlf0h^A^72BHc| zHUxbSxk{y!nD8b`Rwi2;xV@+<>Hr@xRW-d+W*cD;-qFn+9z&8v1$A`mmiyD)usKim znB&WKc4+?5kM)^II3}teSf8Qz4%I9M8KXS0pnJqlOVX4F{-WR2d?a{JmSh)}LVXvO zIXRXYKwOhGq#Tq92#^RsI`UNZyH_J`$iAMPx~dJOg=xZKvcJ}{jM~JpVEgF7NTlZM z;Jgm_prsqWmEE^M@JiQYS7{QePn3eNI*ow=U0slF*KLGi%l144MYiuuem%P%86GvM z;^~SfdB{wts0c4ZPh zDU|vy?8H_zgaRK%WB5qTDvZmZh%yfI6-zE1Pj`SCGKNC3UuqHb?C&qgzBISjhJ#6t zZNs`yli8&7t@+)se+_ZQheT6)cFlrSNK#k?(!Gh)jY3TGO8;eU8;S$VK-!#-z`z!{ z&AO?dIwqqcM%Bfz?Lb0CuH)WrBM({{GPmYIeW=6Wnn*kblToc0j_Z`*oyt+FYbrw~ zLWD$_sash&hav*?qDfI^BCvWc5;FZt)>_c+BL_1B3hHxQEWp&@RGQ_RvWm>R^Lz`W zVo*)CY@eVYbzu4q>L02g*J{XYn#>GaRrl-`c0jDy9B^BNXmym*FjfR6)rE##SAz^4 z#MM01ui2KL?Vr2+cpr)}lzvd3ixSmzZQJ*=?d_B1oNnyI9CXQ&?__@{W)v-T12!O% zFHCc7xV-S!lf*^;ay)4^DWuWq72q%tR`U(Os z_6zW;?5%A&ycx+Abu7Y~!i$9tc(BYgPcI>oH;J9FF^|D^y+v-1i+zz}qR7=P^VG*P z$Q7Hj+emuKl05i;i@k!RA%vhruryR!2pv9ZHV4InL3!zRH{{?ss9j|gSt>!)(g{)> ziq>pyL8n6v#WW0sAzLH+Dv$tnw2+1CFM0<7@2J=VIp`X)jt5omypZXESWycYjx0;+ z1OGIDG7a(1gpWG1>$r$fs9sCQ<_Ox+IYo+k!9#+hFj17oD2PzRsVG30jUp~9e+03p z9ccsm!lQ@4DKZjv0$4A$jVh{U$Ay+18&wQ<4X^r8bD^k^N(LyUr9LC z*p})R8kaZs3Js}t0bo;2;x`+_CtrW?$Bn(B!?AZXESdPv`r$7ESbx5yIrHEDrlnbM zTbncOPqegUPJbI+cfEiv(kf>@e-K?wFX5BZN-}?UPfMGiwRQ_jTbqU7XdMtff}i)d zZWs2o_MtDIx$2pgWx`17m@tk(U&R0OTRVjf_@Zm7MHb#@J(T(1e-4Q6x9$)=g(=%w zW#Jn9KNs!$TE8LuE!u6Zd10WH2zy#L3Eyk&%>1`+x7;nf2|DWmcMhFRt;>Y>THA!} zXwPfS_ul(LxEAyI@FxWIjgdrI2Qz@#ZUs={Ue--WMd$_0n3*@Ty z8^5!v<%_59_~^dD|9WiUUHdxsKe+WD=J?CIuXFGC{EES)3zW_SZ3hm0>CwK&9{ZOs z?RfCyQ@{V-)c^RWbl$2x;dpIGtFm3qDfI{v-IBO8Vb%pD!~CKg|(^ei48 z9J!qiZD9EHdy5CX#n;(nD|+ds+po_FOGY*f4XqsAFj8FH+S4p=-Ma5Kq1jNkuD(rZ z%)N1&kTK0Qy!qyBLg8@KrD5!))tJK= z;pRP$Hx$+iDRIrM!j~UyD4a(>r}St;G|4VJo=OFLXiCh-T8!f0c9}THL~ICJ;X?GS zQ8*W(2zt3MV_9o%4fz2Rk7UX)Uu_IqW(7={h+3WunD;{gfN+_JQYeoj@wa*)C@kZs zz2bk(4HmlZp*e-Vxxq5g6s*FJK^r5oM)QiFMxaWe5OGclH#LO}CF=^;%4PEtEP(&y z5L^mMz%=i%KLOg+B?kfF@*E7L8t5Qx;9? zhbwN0L}$lKwE}K97iL*!&_y38an#4sx1Y^qw)VVvt5CS90jqyZ`0|g_h1G!M><&cB zH5c_>|KoI^WlUZJ=K17vslq#RLWw<|0{lbKqKv}9XyGIf7)N?9n)7(7a4sFsQ5L&QW|IwjMQ(g zcpS6%H;07V`!k>X(wr|Jf*JF^Ljqh%&{KN@P4gi?jl^*)A1#pVWb=wckr8NxvN>1~ zc-hPBLVv`mQuxZ8YVPc^V^+9v`7jUo? zz-BvWDN3H_PJZog=T$;|^o}xL4S<>hzM|_cpz;c4`@j6%v1CGZ1;bf@B?UaxH~K_M zd^shSo_%{kB{zdVD09Q5^xHvNE^Z!Ui?3TeQ5@gk78my{_6OMzPtm`Fj7^96S4hr1 zi(7x~;NVD+tz+Z(z_T-ThVUh3F=TC4#8G;YB9(95de_!yZ9n+V$vw&SWOecXE{X&xDjd*4l(&)gIDo=uw1-W&IxzgIj^7U2G2adpJR>7@C@i0DMUyGLVL zzL_+SkBQsU(TL;Y@rZ{fVjY~Hh3`~314-T=LqAdYe z$A@DNzpzRIvZhe2LU%i6%Qo?Th zxj#&a?YZx77CVKm)g5A{Vs1GdB(qmn&weGC{a!G87cOQhr&s2W1Y%dJ8si^dahPgLt*^7CA^N49a*?(Dx5QnfyJ_E%%=53o)ISZ@SaK@1>VpH-DjWknON zxi8n>^+pZklL6$90>~pb#eO*xWK}VgpdPuYn$cqc>Yn@gqAvh79V%lc)ToY1&@R-V zy%vfvKznh`CDAGiu>|tPHPsBB2_+do?z=e-X)`f6ucDGmAotx|4O!a575$?C^7LgO zD@wrx@^lU4fdF#P_7s=nlLIlEfdePiV*>HyKs8rK0*E6a#JMVnixr#+;JF&W7XrW& zA>i>_Vv7lQnxRHbu#VqS&D`IIA`7wh6sBV}1_f<6efMWmxieRH?J4lQaP#F4w=Q`y z6<$C`*VDL!&d&ny(x+v?jdlW98A#s667#t|52S*GG3h01j0*IMjK z_0+!!jNljm%IP-(s4e6}nQ^pZHE0h9Xs?85C6>g=2(2bma+ngUX6hRO%$X48g<6;y zHw_72FVq0O8~{$?*X@s|NP50CwuLVc1xIIoKKF zI!AtTeKo_+1;|f@$Y+3D4(u~CJT10!#4|OBCj!J*Lc}A3mqVknA(}To$$_&U4sj?y2T>?-v6X4`I(Hu-%bZt3y3U zIa`B*+b&qsD_7BgnwqU;=uI~EWYK*BC&;Ja> z^Zbs$9!}2$QMtoUkWSYieP$n5-VZ{gCq`msNG#~4@Khi^4G7Ubm2^73uR?$K(gJQW~62V}(ZH|s&J zjJX{9%^K|E0ru-5_HjIZtTD)GXBwf9K9kOEPj?+3>&^f6mELh&^mhD1gyyRJg=Dnp3CXq^W$u6l#(i#qsfKgL^kHiUYV#fx$gIQ42l& zY-J`(=KJ!8C#r#W{#)*q&xOF}>j17cCEiL%gD+ISH%(+=knGNp~PC389;^HxaaB*;VsW4xHE)4q!s)5(q1IzJl#AJ`oj z)6@6O06hIL@FKuZ-&YO1dw*a|A@I~D4jdlgWv8DJMrVVePw+C0M zwd`Et4PwQ$Ydr4*SXbp8?FYvRFLaan)z4clQu-y@sJH~a0+foaXo6CwjV3qE^+D7T zLE^jX`Y0sQy2xes2ZgJ>9Z!(z4dB7aW4+VskX?A67ZZpb_Y$6RWdXSCqVZjSOuNqL z_cMuFGd(Cs7^*u6{{NI+%8L4BEKMHNB&Jb!ME+$g4U67cd3hZHto+U;R?TceSnv-H zB7W+P$icr^D3|bcR|plaY+{b<1__Vf;`y^#arxm+JgIJ?(L0|QAg20rshI1qh{}gV zF}HOuHxA{|yQ3H>UN;>dr<>6oy*sMj8sPEK2i_G-d$?3f`R5|>q&%v4_!RTnBa4$sw(w5|YZ`qQo$C|=NqXgAZ3gMJRA>u}5ICrh=wT|&>{ z)ObqWzC5!#)eRH9eM|V5Wqo*AA>FlA$gf7g;m^dR;qlJs)TUEes%T4<&TePJ;E>np zIXZp=pd#a!FFN>Th77;EQN!;J!>`lxC{s-SJE8CkJ{)>2p5@_*_5b641bwL>4B%Id F{vT}G{2u@S literal 0 HcmV?d00001 diff --git a/crates/core/tests/data_err_logs/table_b/_delta_log/00000000000000000010.json b/crates/core/tests/data_err_logs/table_b/_delta_log/00000000000000000010.json new file mode 100644 index 0000000000..c1f8c8b095 --- /dev/null +++ b/crates/core/tests/data_err_logs/table_b/_delta_log/00000000000000000010.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317342605,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":9,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4818"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"ba4b200d-e0ac-4715-ad4e-bed8ef1b20cd"}} +{"add":{"path":"part-00001-55fbea2e-0788-438a-a50c-65f809acc05b-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317342581,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":90},\"maxValues\":{\"id\":90},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-0d891eaf-0e01-46a6-879e-49bbca90c215-c000.snappy.parquet","partitionValues":{},"size":451,"modificationTime":1742317342581,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":91},\"maxValues\":{\"id\":91},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-569d12e9-04d5-4fe2-9554-f288047f3386-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317342592,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":92},\"maxValues\":{\"id\":92},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-1bc89ec9-8092-49e1-9b1f-123ae50e3d40-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317342579,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":93},\"maxValues\":{\"id\":93},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-b4a223d6-1d87-49c9-84c9-a85eece61839-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317342591,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":94},\"maxValues\":{\"id\":94},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-4c3b6be7-979c-4f42-8920-efa32b751d97-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317342578,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":95},\"maxValues\":{\"id\":95},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-6c224359-8995-417a-8b24-b2e530327bc6-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317342581,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":96},\"maxValues\":{\"id\":96},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-2a40eb21-34d2-48ca-aaa5-55db674f56de-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317342590,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":97},\"maxValues\":{\"id\":97},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-ca8256ed-98cd-460d-8de2-9f6f7f388703-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317342579,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":98},\"maxValues\":{\"id\":98},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-f131fc78-c201-4e8d-b194-222b2e79778d-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317342578,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":99},\"maxValues\":{\"id\":99},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_b/_delta_log/00000000000000000011.json b/crates/core/tests/data_err_logs/table_b/_delta_log/00000000000000000011.json new file mode 100644 index 0000000000..05863f6cd0 --- /dev/null +++ b/crates/core/tests/data_err_logs/table_b/_delta_log/00000000000000000011.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317349152,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":10,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4818"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"1657fc30-879d-4b0b-972a-4e3a079fdd7a"}} +{"add":{"path":"part-00001-ceaadd5e-615b-455d-8f4b-052b9c94c7b6-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349136,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":100},\"maxValues\":{\"id\":100},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-b7dba1e7-b1e5-4f02-a223-69ec7353ab45-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349123,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":101},\"maxValues\":{\"id\":101},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-f099cf4d-d418-4852-8580-091908847a66-c000.snappy.parquet","partitionValues":{},"size":451,"modificationTime":1742317349122,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":102},\"maxValues\":{\"id\":102},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-418e8d25-7316-442a-9bc8-616ed01231eb-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349136,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":103},\"maxValues\":{\"id\":103},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-7538a9c2-1ccb-4150-b162-ef8d826fe30f-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349123,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":104},\"maxValues\":{\"id\":104},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-7619f42c-5bc4-4e77-b037-f36481c8b63c-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349123,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":105},\"maxValues\":{\"id\":105},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-eb49d85f-91cc-4293-9339-a664ee905b0f-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349134,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":106},\"maxValues\":{\"id\":106},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-81e22719-7705-4703-b2dd-c4e2982217a7-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349122,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":107},\"maxValues\":{\"id\":107},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-883858d1-9df6-4b55-a2be-5b8387134617-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349122,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":108},\"maxValues\":{\"id\":108},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-07d91938-ac89-48cc-a657-6067d2d9f67e-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349137,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":109},\"maxValues\":{\"id\":109},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_b/_delta_log/00000000000000000012.json b/crates/core/tests/data_err_logs/table_b/_delta_log/00000000000000000012.json new file mode 100644 index 0000000000..4cc44fa8e8 --- /dev/null +++ b/crates/core/tests/data_err_logs/table_b/_delta_log/00000000000000000012.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317349950,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":11,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4818"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"9a035bdd-f892-4449-9c39-401f31fcada6"}} +{"add":{"path":"part-00001-f3b19100-b5b3-4e72-8658-7a937e9ed515-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349924,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":110},\"maxValues\":{\"id\":110},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-54f2324a-e97f-4def-9101-9cc10599ba06-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349919,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":111},\"maxValues\":{\"id\":111},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-3f7ca40a-6497-4208-8a1a-11062456a5a9-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349936,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":112},\"maxValues\":{\"id\":112},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-a14852b2-c743-4a4a-b9c1-0c9472c51699-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349929,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":113},\"maxValues\":{\"id\":113},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-97d06207-5584-43df-afc2-2d1738d79193-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349943,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":114},\"maxValues\":{\"id\":114},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-0d431f03-6dbf-40e7-96fc-b1ebbbe9fc65-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349922,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":115},\"maxValues\":{\"id\":115},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-af0f0232-33c8-4315-821b-8bb1323b7a26-c000.snappy.parquet","partitionValues":{},"size":451,"modificationTime":1742317349936,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":116},\"maxValues\":{\"id\":116},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-4f744428-d088-497e-afd3-0b374e453e7c-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349936,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":117},\"maxValues\":{\"id\":117},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-694064b8-137e-45cd-b2ea-e28af172a2dc-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349918,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":118},\"maxValues\":{\"id\":118},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-56073753-4c1c-4a68-9b4a-13ef5d1a75fb-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349938,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":119},\"maxValues\":{\"id\":119},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_b/_delta_log/00000000000000000013.json b/crates/core/tests/data_err_logs/table_b/_delta_log/00000000000000000013.json new file mode 100644 index 0000000000..b2d03d3ead --- /dev/null +++ b/crates/core/tests/data_err_logs/table_b/_delta_log/00000000000000000013.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317350712,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":12,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4819"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"c3cd0fa1-9c72-4344-8225-0b787e52d5e0"}} +{"add":{"path":"part-00001-7a0d95f8-e122-4cf6-b89c-389036a9b415-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317350696,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":120},\"maxValues\":{\"id\":120},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-f1f035c1-bf0f-485c-950d-c81d0d2aa8a2-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317350706,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":121},\"maxValues\":{\"id\":121},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-d7a51e45-70f3-4379-819b-341951abefff-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317350691,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":122},\"maxValues\":{\"id\":122},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-4828722c-5799-4be1-ace1-14bd7f477dbf-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317350691,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":123},\"maxValues\":{\"id\":123},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-f3c3c72e-5d71-4dc9-9e15-342f1d6cb6cc-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317350701,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":124},\"maxValues\":{\"id\":124},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-e8d74ede-8876-4f55-8e9f-1bbde0d07a35-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317350696,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":125},\"maxValues\":{\"id\":125},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-91052146-2292-45c3-b57e-1fd2dd6be6ed-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317350692,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":126},\"maxValues\":{\"id\":126},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-9d203964-9f4c-4c84-ad77-9ba305bb6572-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317350706,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":127},\"maxValues\":{\"id\":127},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-8362228b-acf6-4937-875b-26c013c342e1-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317350690,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":128},\"maxValues\":{\"id\":128},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-6f57658e-1953-4b59-b504-27c9e8c5cc3b-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317350677,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":129},\"maxValues\":{\"id\":129},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_b/_delta_log/_last_checkpoint b/crates/core/tests/data_err_logs/table_b/_delta_log/_last_checkpoint new file mode 100644 index 0000000000..604c2b31a2 --- /dev/null +++ b/crates/core/tests/data_err_logs/table_b/_delta_log/_last_checkpoint @@ -0,0 +1 @@ +{"version":10,"size":102,"sizeInBytes":21615,"numOfAddFiles":100,"checkpointSchema":{"type":"struct","fields":[{"name":"txn","type":{"type":"struct","fields":[{"name":"appId","type":"string","nullable":true,"metadata":{}},{"name":"version","type":"long","nullable":true,"metadata":{}},{"name":"lastUpdated","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"add","type":{"type":"struct","fields":[{"name":"path","type":"string","nullable":true,"metadata":{}},{"name":"partitionValues","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"size","type":"long","nullable":true,"metadata":{}},{"name":"modificationTime","type":"long","nullable":true,"metadata":{}},{"name":"dataChange","type":"boolean","nullable":true,"metadata":{}},{"name":"tags","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"deletionVector","type":{"type":"struct","fields":[{"name":"storageType","type":"string","nullable":true,"metadata":{}},{"name":"pathOrInlineDv","type":"string","nullable":true,"metadata":{}},{"name":"offset","type":"integer","nullable":true,"metadata":{}},{"name":"sizeInBytes","type":"integer","nullable":true,"metadata":{}},{"name":"cardinality","type":"long","nullable":true,"metadata":{}},{"name":"maxRowIndex","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"baseRowId","type":"long","nullable":true,"metadata":{}},{"name":"defaultRowCommitVersion","type":"long","nullable":true,"metadata":{}},{"name":"clusteringProvider","type":"string","nullable":true,"metadata":{}},{"name":"stats","type":"string","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"remove","type":{"type":"struct","fields":[{"name":"path","type":"string","nullable":true,"metadata":{}},{"name":"deletionTimestamp","type":"long","nullable":true,"metadata":{}},{"name":"dataChange","type":"boolean","nullable":true,"metadata":{}},{"name":"extendedFileMetadata","type":"boolean","nullable":true,"metadata":{}},{"name":"partitionValues","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"size","type":"long","nullable":true,"metadata":{}},{"name":"deletionVector","type":{"type":"struct","fields":[{"name":"storageType","type":"string","nullable":true,"metadata":{}},{"name":"pathOrInlineDv","type":"string","nullable":true,"metadata":{}},{"name":"offset","type":"integer","nullable":true,"metadata":{}},{"name":"sizeInBytes","type":"integer","nullable":true,"metadata":{}},{"name":"cardinality","type":"long","nullable":true,"metadata":{}},{"name":"maxRowIndex","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"baseRowId","type":"long","nullable":true,"metadata":{}},{"name":"defaultRowCommitVersion","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"metaData","type":{"type":"struct","fields":[{"name":"id","type":"string","nullable":true,"metadata":{}},{"name":"name","type":"string","nullable":true,"metadata":{}},{"name":"description","type":"string","nullable":true,"metadata":{}},{"name":"format","type":{"type":"struct","fields":[{"name":"provider","type":"string","nullable":true,"metadata":{}},{"name":"options","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"schemaString","type":"string","nullable":true,"metadata":{}},{"name":"partitionColumns","type":{"type":"array","elementType":"string","containsNull":true},"nullable":true,"metadata":{}},{"name":"configuration","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"createdTime","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"protocol","type":{"type":"struct","fields":[{"name":"minReaderVersion","type":"integer","nullable":true,"metadata":{}},{"name":"minWriterVersion","type":"integer","nullable":true,"metadata":{}},{"name":"readerFeatures","type":{"type":"array","elementType":"string","containsNull":true},"nullable":true,"metadata":{}},{"name":"writerFeatures","type":{"type":"array","elementType":"string","containsNull":true},"nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"domainMetadata","type":{"type":"struct","fields":[{"name":"domain","type":"string","nullable":true,"metadata":{}},{"name":"configuration","type":"string","nullable":true,"metadata":{}},{"name":"removed","type":"boolean","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}}]},"checksum":"94a578f92841fa7ba9cdee96b5905fdb"} diff --git a/crates/core/tests/data_err_logs/table_c/_delta_log/00000000000000000000.json b/crates/core/tests/data_err_logs/table_c/_delta_log/00000000000000000000.json new file mode 100644 index 0000000000..0316f09771 --- /dev/null +++ b/crates/core/tests/data_err_logs/table_c/_delta_log/00000000000000000000.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1742317262289,"operation":"CREATE TABLE","operationParameters":{"partitionBy":"[]","clusterBy":"[]","description":null,"isManaged":"false","properties":"{}"},"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"c0983a7a-aeca-4ba8-a509-d5dbc71a10de"}} +{"metaData":{"id":"5f54ef5f-e511-4114-b6e5-f6c206c068b6","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1742317261939}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":1}} diff --git a/crates/core/tests/data_err_logs/table_c/_delta_log/00000000000000000001.json b/crates/core/tests/data_err_logs/table_c/_delta_log/00000000000000000001.json new file mode 100644 index 0000000000..23002c6f58 --- /dev/null +++ b/crates/core/tests/data_err_logs/table_c/_delta_log/00000000000000000001.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317316973,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":0,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4819"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"a1c49e36-8fcd-46e6-8ede-5b8560f7ec3b"}} +{"add":{"path":"part-00001-665397f5-1435-4478-a598-ca226c99ffcf-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317316825,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":0},\"maxValues\":{\"id\":0},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-fd50becf-074e-4a1f-985b-01529e9f7b03-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317316820,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":1},\"maxValues\":{\"id\":1},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-4fc1f70f-9daa-46e6-83b5-ea8144d4a96d-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317316822,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":2},\"maxValues\":{\"id\":2},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-efb0808c-3b7f-4a4d-bc36-daa91c074b5b-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317316825,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":3},\"maxValues\":{\"id\":3},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-c8664e02-01fe-4c2d-8eba-ae84012d7aad-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317316825,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":4},\"maxValues\":{\"id\":4},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-7446ef47-3110-4c3f-a2d0-0c71bafc893a-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317316820,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":5},\"maxValues\":{\"id\":5},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-ee372cff-6aae-4979-970b-88cc154a31bd-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317316822,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":6},\"maxValues\":{\"id\":6},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-9ea59fdf-fc26-4650-a282-9c2cc1906c7c-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317316823,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":7},\"maxValues\":{\"id\":7},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-d50ebfbf-e534-4bc8-b63d-437f6029da6e-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317316823,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":8},\"maxValues\":{\"id\":8},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-96e67376-3d15-4895-bd5d-5e0a325bcb83-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317316823,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":9},\"maxValues\":{\"id\":9},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_c/_delta_log/00000000000000000002.json b/crates/core/tests/data_err_logs/table_c/_delta_log/00000000000000000002.json new file mode 100644 index 0000000000..86e3c7470c --- /dev/null +++ b/crates/core/tests/data_err_logs/table_c/_delta_log/00000000000000000002.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317326453,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":1,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4818"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"ac59851b-981e-4e82-96ea-36a543cfe254"}} +{"add":{"path":"part-00001-6491d41d-d498-4a89-a291-92d964035606-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317326374,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":10},\"maxValues\":{\"id\":10},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-d359921a-3cb1-454d-8aa0-ac5c830fcdc5-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317326373,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":11},\"maxValues\":{\"id\":11},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-b2aae64d-1fab-4106-bc87-2454e945dada-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317326373,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":12},\"maxValues\":{\"id\":12},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-cca989ea-d56e-4e1e-a4ba-538ef7801997-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317326373,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":13},\"maxValues\":{\"id\":13},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-14706643-f3f0-4ba9-8282-7d55bb4ecacb-c000.snappy.parquet","partitionValues":{},"size":451,"modificationTime":1742317326374,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":14},\"maxValues\":{\"id\":14},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-25801ed5-1cf2-43fa-bbdb-8898fc102e64-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317326373,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":15},\"maxValues\":{\"id\":15},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-03fbeb6f-b8a5-448e-afa7-0f49fca61866-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317326374,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":16},\"maxValues\":{\"id\":16},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-08361c76-870e-4ddf-9153-f67852849ec3-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317326374,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":17},\"maxValues\":{\"id\":17},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-368e738e-0673-4e76-a1ff-5ba9c755396e-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317326373,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":18},\"maxValues\":{\"id\":18},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-2c9a3837-e2c5-42bd-b888-f3205f4b894c-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317326374,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":19},\"maxValues\":{\"id\":19},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_c/_delta_log/00000000000000000003.json b/crates/core/tests/data_err_logs/table_c/_delta_log/00000000000000000003.json new file mode 100644 index 0000000000..14665bcd27 --- /dev/null +++ b/crates/core/tests/data_err_logs/table_c/_delta_log/00000000000000000003.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317330682,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":2,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4818"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"5e6fdfee-2a82-40d1-bc83-47b3f0f3f21e"}} +{"add":{"path":"part-00001-b7be8377-b715-4234-b316-201fd2c9c142-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317330578,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":20},\"maxValues\":{\"id\":20},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-719b1086-b19c-45d1-8c4c-c11db02e2e0b-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317330581,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":21},\"maxValues\":{\"id\":21},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-eb0fd03c-3e47-42ed-9897-e79dd1567fb1-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317330577,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":22},\"maxValues\":{\"id\":22},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-48b99dee-6d3e-4cfb-b651-4769de7f5b24-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317330577,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":23},\"maxValues\":{\"id\":23},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-b873a231-8352-4bac-b6f1-b53ee738d212-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317330582,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":24},\"maxValues\":{\"id\":24},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-74ed051c-b116-4947-b62c-2086bcd5bb90-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317330577,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":25},\"maxValues\":{\"id\":25},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-680d9e49-afce-4a7d-bca8-b03438c2fd74-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317330577,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":26},\"maxValues\":{\"id\":26},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-81c10052-aacc-4ecf-b9cf-64f81b3bd435-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317330577,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":27},\"maxValues\":{\"id\":27},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-cf9cac69-932c-43bf-8e4b-fd059d519c0f-c000.snappy.parquet","partitionValues":{},"size":451,"modificationTime":1742317330577,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":28},\"maxValues\":{\"id\":28},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-43e3252c-8ac7-4c7a-bcb4-15aaf7ae95b9-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317330581,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":29},\"maxValues\":{\"id\":29},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_c/_delta_log/00000000000000000004.json b/crates/core/tests/data_err_logs/table_c/_delta_log/00000000000000000004.json new file mode 100644 index 0000000000..ed86e283d5 --- /dev/null +++ b/crates/core/tests/data_err_logs/table_c/_delta_log/00000000000000000004.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317333588,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":3,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4818"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"57611e9e-5312-4502-a3ad-c0c78799773e"}} +{"add":{"path":"part-00001-5c92b4bb-af84-4066-8aeb-1e493b7147df-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317333524,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":30},\"maxValues\":{\"id\":30},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-b050a084-ab25-420f-bb7b-50eb95d25e4e-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317333551,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":31},\"maxValues\":{\"id\":31},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-1880b504-fcf5-4f43-92d4-c43e8dd9d7d9-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317333550,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":32},\"maxValues\":{\"id\":32},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-4af9173b-8472-41cd-8772-e2bdb084c5d5-c000.snappy.parquet","partitionValues":{},"size":451,"modificationTime":1742317333524,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":33},\"maxValues\":{\"id\":33},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-89cec234-f844-4802-a786-5d9133bbe489-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317333523,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":34},\"maxValues\":{\"id\":34},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-fa11b282-ec0d-4513-9baf-2b84c5f94a12-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317333550,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":35},\"maxValues\":{\"id\":35},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-895723cb-0dba-4019-a2a9-e6db9a937c91-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317333550,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":36},\"maxValues\":{\"id\":36},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-dca57e7a-f859-4b39-bc43-03e1061f1b4e-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317333551,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":37},\"maxValues\":{\"id\":37},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-96756753-7714-4c07-a238-d5b57f42a8ce-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317333523,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":38},\"maxValues\":{\"id\":38},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-9a7b861f-5d9a-41c4-b4ec-7f0d1391acfe-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317333551,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":39},\"maxValues\":{\"id\":39},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_c/_delta_log/00000000000000000005.json b/crates/core/tests/data_err_logs/table_c/_delta_log/00000000000000000005.json new file mode 100644 index 0000000000..f77fd655fe --- /dev/null +++ b/crates/core/tests/data_err_logs/table_c/_delta_log/00000000000000000005.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317336099,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":4,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4819"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"203edd74-d2b2-46fe-935a-6222cfb888d4"}} +{"add":{"path":"part-00001-a24fe71c-ba35-47bb-8f3e-636d5991d5ae-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317336058,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":40},\"maxValues\":{\"id\":40},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-a56ad50f-bc64-44cb-bb55-e2d177947b3d-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317336079,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":41},\"maxValues\":{\"id\":41},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-95c53cf1-b472-4c34-b728-1dd7cbed8b2f-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317336074,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":42},\"maxValues\":{\"id\":42},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-7e442ced-e810-44d9-9d28-3027e652a0ec-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317336080,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":43},\"maxValues\":{\"id\":43},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-68248457-2fa3-407e-9de3-759b1e052b99-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317336075,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":44},\"maxValues\":{\"id\":44},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-89a48ead-5bf3-4d16-aada-97c11386fcaf-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317336076,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":45},\"maxValues\":{\"id\":45},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-42b618de-c46c-4888-9b48-b99493ec2983-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317336070,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":46},\"maxValues\":{\"id\":46},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-4dc49305-f4f8-4ec9-9a40-8f4b3bd81324-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317336055,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":47},\"maxValues\":{\"id\":47},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-d7bb17a9-223e-474b-9d78-2c745cc35a4b-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317336054,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":48},\"maxValues\":{\"id\":48},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-68a79bb6-a31e-49bf-848f-2d64ceb834c0-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317336079,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":49},\"maxValues\":{\"id\":49},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_c/_delta_log/00000000000000000006.json b/crates/core/tests/data_err_logs/table_c/_delta_log/00000000000000000006.json new file mode 100644 index 0000000000..28116ba9f2 --- /dev/null +++ b/crates/core/tests/data_err_logs/table_c/_delta_log/00000000000000000006.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317338700,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":5,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4818"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"9ee4b86b-1bbe-4d6e-adbd-8dd4961989fb"}} +{"add":{"path":"part-00001-a8fc5b00-29e4-4a99-961d-b0cbcc23d165-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317338678,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":50},\"maxValues\":{\"id\":50},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-5a3079f8-abbc-4b5f-a1e3-340830e59222-c000.snappy.parquet","partitionValues":{},"size":451,"modificationTime":1742317338670,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":51},\"maxValues\":{\"id\":51},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-567d7b20-b6ce-4e96-b500-caa34c80f8a7-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317338676,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":52},\"maxValues\":{\"id\":52},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-b794df4b-174f-468a-9de7-2aa865ba7014-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317338676,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":53},\"maxValues\":{\"id\":53},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-a15406cf-c141-4f7b-b302-e4b5a145cad5-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317338675,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":54},\"maxValues\":{\"id\":54},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-8a52349c-d93b-4c59-b493-13486bb5e284-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317338680,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":55},\"maxValues\":{\"id\":55},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-1734b4eb-4414-4b3a-8e99-1bd099c9e6b5-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317338670,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":56},\"maxValues\":{\"id\":56},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-02781f2a-6c34-42ca-80a4-e830b2eeb963-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317338676,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":57},\"maxValues\":{\"id\":57},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-63ec1c21-c31c-43d4-b5c9-9c206aeeb280-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317338670,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":58},\"maxValues\":{\"id\":58},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-b732e8e4-7d1b-470d-89a5-86a3f8d8bdc2-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317338680,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":59},\"maxValues\":{\"id\":59},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_c/_delta_log/00000000000000000007.json b/crates/core/tests/data_err_logs/table_c/_delta_log/00000000000000000007.json new file mode 100644 index 0000000000..956c8b508c --- /dev/null +++ b/crates/core/tests/data_err_logs/table_c/_delta_log/00000000000000000007.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317339658,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":6,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4819"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"63c885b0-74eb-4075-a02a-a43b8202b3f8"}} +{"add":{"path":"part-00001-f697bc51-b77f-4234-938a-5f85478cedec-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317339621,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":60},\"maxValues\":{\"id\":60},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-839f210e-cf84-4c5c-b185-fd2fe2b5ee6f-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317339491,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":61},\"maxValues\":{\"id\":61},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-9ebe1c22-87a1-4f37-a695-77658c3e70a8-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317339491,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":62},\"maxValues\":{\"id\":62},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-01b2687c-45e4-484c-b1d6-80e06b5b5d11-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317339490,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":63},\"maxValues\":{\"id\":63},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-05662cc4-6a79-4204-aec1-2311a44d8c74-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317339511,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":64},\"maxValues\":{\"id\":64},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-67f1b806-ef5f-4f8a-890b-b3b5ad1d234c-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317339490,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":65},\"maxValues\":{\"id\":65},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-37318455-4128-4e1e-9ab7-5c587ac9fde0-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317339621,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":66},\"maxValues\":{\"id\":66},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-c5d68934-1f5a-40c4-b5be-1233eb15378a-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317339511,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":67},\"maxValues\":{\"id\":67},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-41e40903-13b6-4465-aa3c-bd8cb5e52b18-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317339621,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":68},\"maxValues\":{\"id\":68},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-110c626e-ea13-4204-8cae-a3183d89a4b7-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317339621,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":69},\"maxValues\":{\"id\":69},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_c/_delta_log/00000000000000000008.json b/crates/core/tests/data_err_logs/table_c/_delta_log/00000000000000000008.json new file mode 100644 index 0000000000..ea216c1556 --- /dev/null +++ b/crates/core/tests/data_err_logs/table_c/_delta_log/00000000000000000008.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317340794,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":7,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4818"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"b5902ae9-2aab-46f9-82f9-d68ae45940d7"}} +{"add":{"path":"part-00001-3b62f1d4-2a3e-4611-a55b-e9d2ace11b3c-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317340752,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":70},\"maxValues\":{\"id\":70},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-9c066923-23f4-45f6-b2af-5a4ecbef1707-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317340776,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":71},\"maxValues\":{\"id\":71},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-cfb48b6e-0fc7-4d6b-8ab7-c52f29f71b94-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317340752,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":72},\"maxValues\":{\"id\":72},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-b21a45a5-9a53-4dfa-8327-8a82b6b283e9-c000.snappy.parquet","partitionValues":{},"size":451,"modificationTime":1742317340766,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":73},\"maxValues\":{\"id\":73},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-afa450c4-f649-4c88-817a-6d0bdfc4da6f-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317340765,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":74},\"maxValues\":{\"id\":74},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-dd96cd25-394d-4873-84e7-f2f6b0eb5a67-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317340776,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":75},\"maxValues\":{\"id\":75},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-3a98b7ed-8665-4bc5-8704-6745f7084cd0-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317340776,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":76},\"maxValues\":{\"id\":76},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-8dfd3dc5-cf31-42fc-8c55-2ac70ce9e18d-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317340782,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":77},\"maxValues\":{\"id\":77},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-97e7d287-aee3-445d-a90e-f3b2ef4bd7cd-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317340765,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":78},\"maxValues\":{\"id\":78},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-db2340aa-28ff-4826-b39e-07ba516551e9-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317340779,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":79},\"maxValues\":{\"id\":79},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_c/_delta_log/00000000000000000009.json b/crates/core/tests/data_err_logs/table_c/_delta_log/00000000000000000009.json new file mode 100644 index 0000000000..1f9d5fb05b --- /dev/null +++ b/crates/core/tests/data_err_logs/table_c/_delta_log/00000000000000000009.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317341714,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":8,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4819"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"5379512e-d4d3-42b5-817d-70ecf05f2385"}} +{"add":{"path":"part-00001-4e7175fd-6ffb-4b6a-946c-43aa7c439104-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317341702,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":80},\"maxValues\":{\"id\":80},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-f3cc4f07-93ec-4a47-add1-b16c1149c3d9-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317341667,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":81},\"maxValues\":{\"id\":81},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-07581f2d-ee98-4464-a28b-f738e88749e4-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317341702,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":82},\"maxValues\":{\"id\":82},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-c9d7468d-fc14-445c-8dbd-65d616f8eb05-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317341666,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":83},\"maxValues\":{\"id\":83},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-56eb2260-fb49-4138-a5c8-f0ae0949f4e2-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317341670,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":84},\"maxValues\":{\"id\":84},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-5d15eeca-0fca-4986-a18e-4d86bf5ba2f6-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317341670,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":85},\"maxValues\":{\"id\":85},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-3e13d733-f55d-42ac-be4f-f4400e999c29-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317341702,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":86},\"maxValues\":{\"id\":86},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-22c23f1e-26d2-488e-8e07-2de6ae5fded5-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317341667,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":87},\"maxValues\":{\"id\":87},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-d0debf09-5f82-4c61-8636-27e51fba37e5-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317341674,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":88},\"maxValues\":{\"id\":88},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-7403e0c4-bb07-4a1b-9fca-a01523713f85-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317341702,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":89},\"maxValues\":{\"id\":89},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_c/_delta_log/00000000000000000010.checkpoint.parquet b/crates/core/tests/data_err_logs/table_c/_delta_log/00000000000000000010.checkpoint.parquet new file mode 100644 index 0000000000000000000000000000000000000000..9df253f2f762891c53763e62e57576217a8b289a GIT binary patch literal 21615 zcmeHve{@vWedn9eNJxOjGSQn1ss0cYt8$MXeZSv*J~}DCYRg2%@)&#F)J}Zgd-s8C z{i-xzpbN&@WK1Zf3Lz{>Xcs&FL5b`75Z6szH)~Rs^*WB%OX8GzecHfw-NyBYj6pooH)6&Hv<{p$ON=68rMrSQ2) zY!-w$`36A{UP=i+lL1qsVriLp{0A?WQk6tru+9`fOL!!eZa8>2pI(Xf7#%N4vMejI z6$9Zxb1&s0pGFh|C){f0z+svEMa zOTMYw5>`PZqF7WiJ=yVmSNEvtOWSq}`z?u3L)CQKl{_09fR-&ex~)lyrW4a~45kuY z+H!-iob2#zm%64c*_!G~y5{&2VY(yvo^05jq1dkMPrj476RTyGVi`Ud^L=1CCY5a6 zOx@B+spaaLt;mz0E$fo3II2l3pcxDc!crwi@l1)xOg0_EF+4@#Lv@Mykc3QJNpmb8 z64O1&aWzx&D76(=wsee^wsfQx9qv{PT{c}`f{d2m-)A|Jqsb5^MomvLRFGOQMbnBT;2vavab%Waik0r(%t4(oN}xL~NJ25WD0PebM25%%v7FCBw57 zO>-Qk6MHB0?dVihHONEp2uOn~mZ%Xhbum(Wgc^}mMKWX z)g04dmSkHnUftK>X*Qgg`BYOK%TPSKM4RrkB-8R0hsY+rF)szB$>E;-SHF;q0;@`MRGiG}D_2)n$qJSia*CMN=nf z(|-7Xi6Ce@4q!AyJ>BNMuM=NVJyUm?Ll7ysPfuZ)h)uo))3@Q82*wQDY7Q)d5=GZN zo!aK)+J^nGFdNeLR2VkQ3G;Nrlc09MW^BTAS=_G|`8Q9rwLZa=kn+(G1aWKGwItNNaWn3uYt z8=+p;U^v9Kvwt|Z9d<+u-ACWvayW$$uNp*F7;=c>`k<;oehxB=M2JnGEm*7>g!pzSMKj4MuN{_XT3UAQ zTzXpfeb_IxUCS^u+bngYIyA{f%G2Caf8LCQb+mgGG-M$oDjrnkFb@kyY#^YgTe4}w zhkVVKv-6r)K|Csf`*?=oz@3mz_FTy9P%WbPDzcM{h^VWGtwg2>`B(%j0kPB6CJ(28 zCmW`zx=3p#tT8*%vJ9aXQA$GagMsKCad#e1uSJkz)Ic_dK3&y>4H^(i>eX&&)pZpc z$-$a@yQvrUVpGc@@NFJ>X||;`qf3hx^hlO3dkWGNb^X%e)RZn+Ob1`6Fc=SwpadE) zT?vWBVy2-|nZaJ*tP*myNo=e`@eOd8+TCk7Oj1-$V-8%^B6J%v07(0arueQ!Bv++t zn;G#=J&1&;sw$Rj*_K5ncQs}^uFCt4P8^dRJ=B4i=b6x2_UqYh*HC@c_ASM+cegg= zb%p7&EoXPM-VGnsv0_9T4S4_}K?*liMb!QfyY6-)=Kdc2eiIT$`CfJSR{wLr??;LW8Tq z3>nf3Wds>U^(;zDZYpm{K2;QF`Clw!E($c=P+&$5j9hiHpSfzjZ|ijDWAmmE3|(7w zbjN|mA(bMPBY;q(X)F;HiUphUO4nw35bibGR}};dxABIIYh;JAI&1{%@KHD+L7K=X z%V2Q|Ok750q==*luER?l8|yNW36$(tnlrjq`fj66Bm^N#bz$xBx1(+C7F2A>5AEuN z$rw7qo`clQZAMXK*Hlf0h^A^72BHc| zHUxbSxk{y!nD8b`Rwi2;xV@+<>Hr@xRW-d+W*cD;-qFn+9z&8v1$A`mmiyD)usKim znB&WKc4+?5kM)^II3}teSf8Qz4%I9M8KXS0pnJqlOVX4F{-WR2d?a{JmSh)}LVXvO zIXRXYKwOhGq#Tq92#^RsI`UNZyH_J`$iAMPx~dJOg=xZKvcJ}{jM~JpVEgF7NTlZM z;Jgm_prsqWmEE^M@JiQYS7{QePn3eNI*ow=U0slF*KLGi%l144MYiuuem%P%86GvM z;^~SfdB{wts0c4ZPh zDU|vy?8H_zgaRK%WB5qTDvZmZh%yfI6-zE1Pj`SCGKNC3UuqHb?C&qgzBISjhJ#6t zZNs`yli8&7t@+)se+_ZQheT6)cFlrSNK#k?(!Gh)jY3TGO8;eU8;S$VK-!#-z`z!{ z&AO?dIwqqcM%Bfz?Lb0CuH)WrBM({{GPmYIeW=6Wnn*kblToc0j_Z`*oyt+FYbrw~ zLWD$_sash&hav*?qDfI^BCvWc5;FZt)>_c+BL_1B3hHxQEWp&@RGQ_RvWm>R^Lz`W zVo*)CY@eVYbzu4q>L02g*J{XYn#>GaRrl-`c0jDy9B^BNXmym*FjfR6)rE##SAz^4 z#MM01ui2KL?Vr2+cpr)}lzvd3ixSmzZQJ*=?d_B1oNnyI9CXQ&?__@{W)v-T12!O% zFHCc7xV-S!lf*^;ay)4^DWuWq72q%tR`U(Os z_6zW;?5%A&ycx+Abu7Y~!i$9tc(BYgPcI>oH;J9FF^|D^y+v-1i+zz}qR7=P^VG*P z$Q7Hj+emuKl05i;i@k!RA%vhruryR!2pv9ZHV4InL3!zRH{{?ss9j|gSt>!)(g{)> ziq>pyL8n6v#WW0sAzLH+Dv$tnw2+1CFM0<7@2J=VIp`X)jt5omypZXESWycYjx0;+ z1OGIDG7a(1gpWG1>$r$fs9sCQ<_Ox+IYo+k!9#+hFj17oD2PzRsVG30jUp~9e+03p z9ccsm!lQ@4DKZjv0$4A$jVh{U$Ay+18&wQ<4X^r8bD^k^N(LyUr9LC z*p})R8kaZs3Js}t0bo;2;x`+_CtrW?$Bn(B!?AZXESdPv`r$7ESbx5yIrHEDrlnbM zTbncOPqegUPJbI+cfEiv(kf>@e-K?wFX5BZN-}?UPfMGiwRQ_jTbqU7XdMtff}i)d zZWs2o_MtDIx$2pgWx`17m@tk(U&R0OTRVjf_@Zm7MHb#@J(T(1e-4Q6x9$)=g(=%w zW#Jn9KNs!$TE8LuE!u6Zd10WH2zy#L3Eyk&%>1`+x7;nf2|DWmcMhFRt;>Y>THA!} zXwPfS_ul(LxEAyI@FxWIjgdrI2Qz@#ZUs={Ue--WMd$_0n3*@Ty z8^5!v<%_59_~^dD|9WiUUHdxsKe+WD=J?CIuXFGC{EES)3zW_SZ3hm0>CwK&9{ZOs z?RfCyQ@{V-)c^RWbl$2x;dpIGtFm3qDfI{v-IBO8Vb%pD!~CKg|(^ei48 z9J!qiZD9EHdy5CX#n;(nD|+ds+po_FOGY*f4XqsAFj8FH+S4p=-Ma5Kq1jNkuD(rZ z%)N1&kTK0Qy!qyBLg8@KrD5!))tJK= z;pRP$Hx$+iDRIrM!j~UyD4a(>r}St;G|4VJo=OFLXiCh-T8!f0c9}THL~ICJ;X?GS zQ8*W(2zt3MV_9o%4fz2Rk7UX)Uu_IqW(7={h+3WunD;{gfN+_JQYeoj@wa*)C@kZs zz2bk(4HmlZp*e-Vxxq5g6s*FJK^r5oM)QiFMxaWe5OGclH#LO}CF=^;%4PEtEP(&y z5L^mMz%=i%KLOg+B?kfF@*E7L8t5Qx;9? zhbwN0L}$lKwE}K97iL*!&_y38an#4sx1Y^qw)VVvt5CS90jqyZ`0|g_h1G!M><&cB zH5c_>|KoI^WlUZJ=K17vslq#RLWw<|0{lbKqKv}9XyGIf7)N?9n)7(7a4sFsQ5L&QW|IwjMQ(g zcpS6%H;07V`!k>X(wr|Jf*JF^Ljqh%&{KN@P4gi?jl^*)A1#pVWb=wckr8NxvN>1~ zc-hPBLVv`mQuxZ8YVPc^V^+9v`7jUo? zz-BvWDN3H_PJZog=T$;|^o}xL4S<>hzM|_cpz;c4`@j6%v1CGZ1;bf@B?UaxH~K_M zd^shSo_%{kB{zdVD09Q5^xHvNE^Z!Ui?3TeQ5@gk78my{_6OMzPtm`Fj7^96S4hr1 zi(7x~;NVD+tz+Z(z_T-ThVUh3F=TC4#8G;YB9(95de_!yZ9n+V$vw&SWOecXE{X&xDjd*4l(&)gIDo=uw1-W&IxzgIj^7U2G2adpJR>7@C@i0DMUyGLVL zzL_+SkBQsU(TL;Y@rZ{fVjY~Hh3`~314-T=LqAdYe z$A@DNzpzRIvZhe2LU%i6%Qo?Th zxj#&a?YZx77CVKm)g5A{Vs1GdB(qmn&weGC{a!G87cOQhr&s2W1Y%dJ8si^dahPgLt*^7CA^N49a*?(Dx5QnfyJ_E%%=53o)ISZ@SaK@1>VpH-DjWknON zxi8n>^+pZklL6$90>~pb#eO*xWK}VgpdPuYn$cqc>Yn@gqAvh79V%lc)ToY1&@R-V zy%vfvKznh`CDAGiu>|tPHPsBB2_+do?z=e-X)`f6ucDGmAotx|4O!a575$?C^7LgO zD@wrx@^lU4fdF#P_7s=nlLIlEfdePiV*>HyKs8rK0*E6a#JMVnixr#+;JF&W7XrW& zA>i>_Vv7lQnxRHbu#VqS&D`IIA`7wh6sBV}1_f<6efMWmxieRH?J4lQaP#F4w=Q`y z6<$C`*VDL!&d&ny(x+v?jdlW98A#s667#t|52S*GG3h01j0*IMjK z_0+!!jNljm%IP-(s4e6}nQ^pZHE0h9Xs?85C6>g=2(2bma+ngUX6hRO%$X48g<6;y zHw_72FVq0O8~{$?*X@s|NP50CwuLVc1xIIoKKF zI!AtTeKo_+1;|f@$Y+3D4(u~CJT10!#4|OBCj!J*Lc}A3mqVknA(}To$$_&U4sj?y2T>?-v6X4`I(Hu-%bZt3y3U zIa`B*+b&qsD_7BgnwqU;=uI~EWYK*BC&;Ja> z^Zbs$9!}2$QMtoUkWSYieP$n5-VZ{gCq`msNG#~4@Khi^4G7Ubm2^73uR?$K(gJQW~62V}(ZH|s&J zjJX{9%^K|E0ru-5_HjIZtTD)GXBwf9K9kOEPj?+3>&^f6mELh&^mhD1gyyRJg=Dnp3CXq^W$u6l#(i#qsfKgL^kHiUYV#fx$gIQ42l& zY-J`(=KJ!8C#r#W{#)*q&xOF}>j17cCEiL%gD+ISH%(+=knGNp~PC389;^HxaaB*;VsW4xHE)4q!s)5(q1IzJl#AJ`oj z)6@6O06hIL@FKuZ-&YO1dw*a|A@I~D4jdlgWv8DJMrVVePw+C0M zwd`Et4PwQ$Ydr4*SXbp8?FYvRFLaan)z4clQu-y@sJH~a0+foaXo6CwjV3qE^+D7T zLE^jX`Y0sQy2xes2ZgJ>9Z!(z4dB7aW4+VskX?A67ZZpb_Y$6RWdXSCqVZjSOuNqL z_cMuFGd(Cs7^*u6{{NI+%8L4BEKMHNB&Jb!ME+$g4U67cd3hZHto+U;R?TceSnv-H zB7W+P$icr^D3|bcR|plaY+{b<1__Vf;`y^#arxm+JgIJ?(L0|QAg20rshI1qh{}gV zF}HOuHxA{|yQ3H>UN;>dr<>6oy*sMj8sPEK2i_G-d$?3f`R5|>q&%v4_!RTnBa4$sw(w5|YZ`qQo$C|=NqXgAZ3gMJRA>u}5ICrh=wT|&>{ z)ObqWzC5!#)eRH9eM|V5Wqo*AA>FlA$gf7g;m^dR;qlJs)TUEes%T4<&TePJ;E>np zIXZp=pd#a!FFN>Th77;EQN!;J!>`lxC{s-SJE8CkJ{)>2p5@_*_5b641bwL>4B%Id F{vT}G{2u@S literal 0 HcmV?d00001 diff --git a/crates/core/tests/data_err_logs/table_c/_delta_log/00000000000000000010.json b/crates/core/tests/data_err_logs/table_c/_delta_log/00000000000000000010.json new file mode 100644 index 0000000000..c1f8c8b095 --- /dev/null +++ b/crates/core/tests/data_err_logs/table_c/_delta_log/00000000000000000010.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317342605,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":9,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4818"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"ba4b200d-e0ac-4715-ad4e-bed8ef1b20cd"}} +{"add":{"path":"part-00001-55fbea2e-0788-438a-a50c-65f809acc05b-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317342581,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":90},\"maxValues\":{\"id\":90},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-0d891eaf-0e01-46a6-879e-49bbca90c215-c000.snappy.parquet","partitionValues":{},"size":451,"modificationTime":1742317342581,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":91},\"maxValues\":{\"id\":91},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-569d12e9-04d5-4fe2-9554-f288047f3386-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317342592,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":92},\"maxValues\":{\"id\":92},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-1bc89ec9-8092-49e1-9b1f-123ae50e3d40-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317342579,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":93},\"maxValues\":{\"id\":93},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-b4a223d6-1d87-49c9-84c9-a85eece61839-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317342591,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":94},\"maxValues\":{\"id\":94},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-4c3b6be7-979c-4f42-8920-efa32b751d97-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317342578,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":95},\"maxValues\":{\"id\":95},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-6c224359-8995-417a-8b24-b2e530327bc6-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317342581,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":96},\"maxValues\":{\"id\":96},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-2a40eb21-34d2-48ca-aaa5-55db674f56de-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317342590,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":97},\"maxValues\":{\"id\":97},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-ca8256ed-98cd-460d-8de2-9f6f7f388703-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317342579,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":98},\"maxValues\":{\"id\":98},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-f131fc78-c201-4e8d-b194-222b2e79778d-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317342578,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":99},\"maxValues\":{\"id\":99},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_c/_delta_log/00000000000000000011.json b/crates/core/tests/data_err_logs/table_c/_delta_log/00000000000000000011.json new file mode 100644 index 0000000000..05863f6cd0 --- /dev/null +++ b/crates/core/tests/data_err_logs/table_c/_delta_log/00000000000000000011.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317349152,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":10,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4818"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"1657fc30-879d-4b0b-972a-4e3a079fdd7a"}} +{"add":{"path":"part-00001-ceaadd5e-615b-455d-8f4b-052b9c94c7b6-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349136,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":100},\"maxValues\":{\"id\":100},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-b7dba1e7-b1e5-4f02-a223-69ec7353ab45-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349123,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":101},\"maxValues\":{\"id\":101},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-f099cf4d-d418-4852-8580-091908847a66-c000.snappy.parquet","partitionValues":{},"size":451,"modificationTime":1742317349122,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":102},\"maxValues\":{\"id\":102},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-418e8d25-7316-442a-9bc8-616ed01231eb-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349136,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":103},\"maxValues\":{\"id\":103},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-7538a9c2-1ccb-4150-b162-ef8d826fe30f-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349123,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":104},\"maxValues\":{\"id\":104},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-7619f42c-5bc4-4e77-b037-f36481c8b63c-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349123,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":105},\"maxValues\":{\"id\":105},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-eb49d85f-91cc-4293-9339-a664ee905b0f-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349134,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":106},\"maxValues\":{\"id\":106},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-81e22719-7705-4703-b2dd-c4e2982217a7-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349122,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":107},\"maxValues\":{\"id\":107},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-883858d1-9df6-4b55-a2be-5b8387134617-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349122,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":108},\"maxValues\":{\"id\":108},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-07d91938-ac89-48cc-a657-6067d2d9f67e-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349137,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":109},\"maxValues\":{\"id\":109},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_c/_delta_log/00000000000000000013.json b/crates/core/tests/data_err_logs/table_c/_delta_log/00000000000000000013.json new file mode 100644 index 0000000000..b2d03d3ead --- /dev/null +++ b/crates/core/tests/data_err_logs/table_c/_delta_log/00000000000000000013.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317350712,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":12,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4819"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"c3cd0fa1-9c72-4344-8225-0b787e52d5e0"}} +{"add":{"path":"part-00001-7a0d95f8-e122-4cf6-b89c-389036a9b415-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317350696,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":120},\"maxValues\":{\"id\":120},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-f1f035c1-bf0f-485c-950d-c81d0d2aa8a2-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317350706,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":121},\"maxValues\":{\"id\":121},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-d7a51e45-70f3-4379-819b-341951abefff-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317350691,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":122},\"maxValues\":{\"id\":122},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-4828722c-5799-4be1-ace1-14bd7f477dbf-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317350691,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":123},\"maxValues\":{\"id\":123},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-f3c3c72e-5d71-4dc9-9e15-342f1d6cb6cc-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317350701,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":124},\"maxValues\":{\"id\":124},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-e8d74ede-8876-4f55-8e9f-1bbde0d07a35-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317350696,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":125},\"maxValues\":{\"id\":125},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-91052146-2292-45c3-b57e-1fd2dd6be6ed-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317350692,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":126},\"maxValues\":{\"id\":126},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-9d203964-9f4c-4c84-ad77-9ba305bb6572-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317350706,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":127},\"maxValues\":{\"id\":127},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-8362228b-acf6-4937-875b-26c013c342e1-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317350690,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":128},\"maxValues\":{\"id\":128},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-6f57658e-1953-4b59-b504-27c9e8c5cc3b-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317350677,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":129},\"maxValues\":{\"id\":129},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_c/_delta_log/_last_checkpoint b/crates/core/tests/data_err_logs/table_c/_delta_log/_last_checkpoint new file mode 100644 index 0000000000..604c2b31a2 --- /dev/null +++ b/crates/core/tests/data_err_logs/table_c/_delta_log/_last_checkpoint @@ -0,0 +1 @@ +{"version":10,"size":102,"sizeInBytes":21615,"numOfAddFiles":100,"checkpointSchema":{"type":"struct","fields":[{"name":"txn","type":{"type":"struct","fields":[{"name":"appId","type":"string","nullable":true,"metadata":{}},{"name":"version","type":"long","nullable":true,"metadata":{}},{"name":"lastUpdated","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"add","type":{"type":"struct","fields":[{"name":"path","type":"string","nullable":true,"metadata":{}},{"name":"partitionValues","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"size","type":"long","nullable":true,"metadata":{}},{"name":"modificationTime","type":"long","nullable":true,"metadata":{}},{"name":"dataChange","type":"boolean","nullable":true,"metadata":{}},{"name":"tags","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"deletionVector","type":{"type":"struct","fields":[{"name":"storageType","type":"string","nullable":true,"metadata":{}},{"name":"pathOrInlineDv","type":"string","nullable":true,"metadata":{}},{"name":"offset","type":"integer","nullable":true,"metadata":{}},{"name":"sizeInBytes","type":"integer","nullable":true,"metadata":{}},{"name":"cardinality","type":"long","nullable":true,"metadata":{}},{"name":"maxRowIndex","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"baseRowId","type":"long","nullable":true,"metadata":{}},{"name":"defaultRowCommitVersion","type":"long","nullable":true,"metadata":{}},{"name":"clusteringProvider","type":"string","nullable":true,"metadata":{}},{"name":"stats","type":"string","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"remove","type":{"type":"struct","fields":[{"name":"path","type":"string","nullable":true,"metadata":{}},{"name":"deletionTimestamp","type":"long","nullable":true,"metadata":{}},{"name":"dataChange","type":"boolean","nullable":true,"metadata":{}},{"name":"extendedFileMetadata","type":"boolean","nullable":true,"metadata":{}},{"name":"partitionValues","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"size","type":"long","nullable":true,"metadata":{}},{"name":"deletionVector","type":{"type":"struct","fields":[{"name":"storageType","type":"string","nullable":true,"metadata":{}},{"name":"pathOrInlineDv","type":"string","nullable":true,"metadata":{}},{"name":"offset","type":"integer","nullable":true,"metadata":{}},{"name":"sizeInBytes","type":"integer","nullable":true,"metadata":{}},{"name":"cardinality","type":"long","nullable":true,"metadata":{}},{"name":"maxRowIndex","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"baseRowId","type":"long","nullable":true,"metadata":{}},{"name":"defaultRowCommitVersion","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"metaData","type":{"type":"struct","fields":[{"name":"id","type":"string","nullable":true,"metadata":{}},{"name":"name","type":"string","nullable":true,"metadata":{}},{"name":"description","type":"string","nullable":true,"metadata":{}},{"name":"format","type":{"type":"struct","fields":[{"name":"provider","type":"string","nullable":true,"metadata":{}},{"name":"options","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"schemaString","type":"string","nullable":true,"metadata":{}},{"name":"partitionColumns","type":{"type":"array","elementType":"string","containsNull":true},"nullable":true,"metadata":{}},{"name":"configuration","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"createdTime","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"protocol","type":{"type":"struct","fields":[{"name":"minReaderVersion","type":"integer","nullable":true,"metadata":{}},{"name":"minWriterVersion","type":"integer","nullable":true,"metadata":{}},{"name":"readerFeatures","type":{"type":"array","elementType":"string","containsNull":true},"nullable":true,"metadata":{}},{"name":"writerFeatures","type":{"type":"array","elementType":"string","containsNull":true},"nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"domainMetadata","type":{"type":"struct","fields":[{"name":"domain","type":"string","nullable":true,"metadata":{}},{"name":"configuration","type":"string","nullable":true,"metadata":{}},{"name":"removed","type":"boolean","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}}]},"checksum":"94a578f92841fa7ba9cdee96b5905fdb"} diff --git a/crates/core/tests/data_err_logs/table_d/_delta_log/00000000000000000000.checkpoint.parquet b/crates/core/tests/data_err_logs/table_d/_delta_log/00000000000000000000.checkpoint.parquet new file mode 100644 index 0000000000000000000000000000000000000000..9fd1755fd595e3c7143926272b940120fd45856d GIT binary patch literal 10569 zcmeHNeQXrR72jJQ=dZ)q!L#gvaj6roj$}JF*w|R7s&P z?(E*&oqgb0tX z_OgmpksrJ?|8j*3SwfMELRXX3otRo&33g~M9Z3fk|#``bYl*+4>*g~tx*!fzW$ z13CBFhX<>26etA+`jynT1~da?;2e(peD2>Yqh+Th5hWEf=1mWQ-#?taJ(QDGJx_zg z7aELx{aQnJL)u6UM2v=}hN!N^)O17l*I#HzC~;TQ}6 zq}J3B*9=8f45gv_g#!n^alq1V_=CaUudr>+wt;%9p45|n{Osdjm)K)$Uy8!X@RyJO z$RnVoPr=W%?_B&3vmu*F6jp;NsunYpmZ+u}11YT~0;RRnma5tQzS+H^3pql%j-xPQ22N=jD}MjD@#{(dc? z?nuP;0l|W)>@HshYCHj1fL|g8B(L!4O19Oo!301*Xlhj zvgTk38*s*H-f)_eUPdm90OBBm;th>V{Nn0A_@%h}k=%h}lRDmHf3X^yR6y_cNk(Q4K^#SkMkZUIAIm&<%h-rNjobG!cooy$NVLymI-o+P-h@#ctv?!6ndR zi}iz`W%RAxgNk_5`nNSxr|Ni8)uTErPGEmhk3+WrBP#Hl?Xf*dqEAEJsvwhDNxO~mwswq>vVN(5zm$!IjK8Ss?=<0?^)t%;s} zhL$$fsf;KoRZl1}-PnhMy@t}aL~}*CJglU(UCHORs;1{xspkAa*JH_eTsNMw=Y_f} zZ74?C(iv3nrL=f*uSTTYsiE^&WBUfZ2iSOjt_#7ZqFk#zZ@@R0rarF6v@coT#c1UJ zc6z~S=Je(>W~u)C#A1QEy6A}nHOuhiu?3=-RGSkg>TEHhXS{ffu_F^ZDpf5VN$LF< z=JxChvM-uU#TCO|TV?&J_UVeo~Kj3i@Fk)zAGI<6;nX$p9UEht>&*!|U%ZaCc-MW7c9eB3;j6>StE zpQBuNgH)4oMNimE&rZ}nv)gtXx-#2M_8Ux$rDirz1<4zz3*6ciEUP2_Ku4KR2sC2$ zH?uy0*HX9C9V`oNg0oN^oVmb7f*>7iBel{r9B+kS4;&#~Kznmw&>jvtg3ymBbbmI~ zy^~OQND2k_bMO-gAN>qUJ_V;&rZen1bhlx0PkA`xnKmLxCn@q8MXm>9drJVwCoX3% z!aayG_Da3MYdqrxWPF!0o)%G;A{fmD!#GcioR)LQd5v-oV%flUE!v0qkl8m_!Xa-X z^2j>$%oz@uvpX`DGd#=424I<>EH{8faLIRVrVM|B$B!fa9K}z1+5B{&o4MWBz3Wmw zobE|4XVqEc`+)MzFU-g7){JtVr<_8{?|l{>bpmQGqG@726|G&KQJq-EX*Z8l$0^m! z!c^{W1jaYR^9^nUzBede#>ZwqpK9(e491u7ar%uS-}{tr8u(lhN_Wr#fj-UCOd!p{ z2K3l5DEJhnaYgqwX&VgY^mzk`UZq4=uyW*zE@x9@=3L?7?;-pQg^xqRa>EOK3^B5C zK8IaJviB(2tULQbq%Nc1P7FHBLl3nB^d^OlL7mi2Le?i%b{-ll=X~%Af`9xVI&&I} ztdKt=6nQ`Om_8SHvP(#I zm68orvM{mKDKg0pRdO0$N7A8(&|On^K*~kp6i<7&189FqX)|~`mOTqjs>P&~@pC$5 z5d0?!o(3>C|6KG;^XQX^o}=hdD1}<9$qpVoDHK?jqsuux&LPvG_2`?+i!pHyxy*CD zk6gzn*ErPgEt|S5wTj8z<5iq`H<9vnN;$hIWuf#v%d-w`0@llvbqvbWcKXh*D4)T{ zR&Y8VL$=$L?b4!bd6W1hp7JzOz7oV)IaZ87FC8Vj?MjHJKRZRY2o8C4oeqvu63GYr#Q;AQ`#9kzcR#X#jF9|vg^mJ&u#M*O1s@ix`kk- z7IntY*st-z?DJuP+?1*1Y?w#ZQxBsj=I(^mm8bBddyeNF>;m2?$~y^{@Hv5(b^fG| zfpXhTPjHfi4o^Z+!p(7nb&??A-`ZbTMBy3Lrz$+*%)4ndAP3x|5^1B5xP zkR7g-8J5BT4Y@8!Tg2hbAmU^TX%T`mg+yf4bC`%36oL0X=D#Syd2)-$pokeN;`kcJk#<3e zD-Pwx^d4zX@c5cg_N7wx69~_`a>{<|R^}usGyDj7LI@7km_fn4F1%@1zeLH)RQgd zrR1~p+5{fxB}%h(L)&b+S*|y4Rhq3kmUQ#RmiCs`=G{*`x^s8$jwc1V#vIrJ7Z+Nr z1*^F7gOs@0ByP@6EXdnzN%7P=>n3JhTT9!9Su2aM9ZCxANS~sg5;m(ci uTBKw1##Xhfvo)eBTHB^Jt-ZBNRl~}|Q22veo%{S(n1KKPvJ?Ij<$nRQB4*(L literal 0 HcmV?d00001 diff --git a/crates/core/tests/data_err_logs/table_d/_delta_log/00000000000000000000.json b/crates/core/tests/data_err_logs/table_d/_delta_log/00000000000000000000.json new file mode 100644 index 0000000000..d78fe64124 --- /dev/null +++ b/crates/core/tests/data_err_logs/table_d/_delta_log/00000000000000000000.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1742252106605,"engineInfo":"Kernel-3.4.0-SNAPSHOT/test-engine","operation":"CREATE TABLE","operationParameters":{"partitionBy":"[]"},"isBlindAppend":true,"txnId":"138db460-c7d3-4d13-972e-0f4e35b58b43","operationMetrics":{}}} +{"metaData":{"id":"ebffcf7b-bf25-427f-a91a-d4a0d27ebbef","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"createdTime":1742252106422,"configuration":{"delta.feature.catalogowned":"supported"}}} +{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["catalogOwned"],"writerFeatures":["catalogOwned","invariants","appendOnly"]}} diff --git a/crates/core/tests/data_err_logs/table_d/_delta_log/_last_checkpoint b/crates/core/tests/data_err_logs/table_d/_delta_log/_last_checkpoint new file mode 100644 index 0000000000..f55c81779f --- /dev/null +++ b/crates/core/tests/data_err_logs/table_d/_delta_log/_last_checkpoint @@ -0,0 +1 @@ +{"version":0,"size":0} diff --git a/crates/core/tests/data_err_logs/table_e/_delta_log/00000000000000000000.checkpoint.parquet b/crates/core/tests/data_err_logs/table_e/_delta_log/00000000000000000000.checkpoint.parquet new file mode 100644 index 0000000000000000000000000000000000000000..4d9437c57f0dbf93d505b3eeb380285c99f834f9 GIT binary patch literal 11644 zcmdT~eQaCR6@PYI=gUn~n)_;3ZV7>!u+_n7zB2CBd(OG{p6lKB@2Mvealq%*@jv`vg`3#n(N0Pv$tp=wk#xP6 z6{LcE|A%uwDl>5wQBGvl;qzkez;{=g-cnqf4``H@hNghl(&kkaP4PDR0?ppG#!jEN zp}s|FX;FL)YG}-*;rJ(xU-?y07Lf%Y@a?M?7JhAhn^yGGdRwT0 zu8zHNp_q+}8`=ieDcZsZf1|@YV0ra<7EQ+sy4=;De)vvl77ev}$}jV4nE&l(LW%q;ep+uICy$eN z|K9(q*&S)FvfKf7Z)t054c0d|d4mC8GvumP)f)&XKCjjkY}A?>HV2f=m`G9i7p8uB zgij#zX$xkQ{BLVW4LLXd@!<+K`78-rFehc^z+3B|b(2P(RqM)wI_n^mtEDuh~@78PdWSj9-J;R3i8af^bufK^BVYT9+1w4vah{ z5C#I^)F47NT~QTXfv`Pv=<67Q=!bo?M}E7CjWu%&)LI+J26FDt$6w(oh}qIARUGFD zcpuqEE`R63zxfz4k*v}>7>cTeb)~LTQ}mv=Ru=?S!m+N{lTl3tT^ivt_m`I@xEv9o z6c7?U-QBUcP8q656*>RzKVQyT7_1n{fQSUo{b!(;kB-^ZiZclW?eIDL#>dwd5$TW| zKxe4A%TO~;;^duwoc=GYrDXtNCrnKT%^wR{JY|5Pba!h}byqZe5T8QP14=xkKt=?B zys7L4U%FOXTuuqT#Ojjm()Av#E~js)n}8c&L1k=(C{w~#f0vUt88hfw!q$CL)AZT` zW4BmX$C3Y#(U6xMVE@zo3Zx@uxGM&&ZmymWDwQIS z1|6JY+QEB|t>nFvE4hsR3NB-&f)fm`;?3z*oOZa9)6P|Lg3;BynM^l_ZsENb)6M=h zymu>oc~NH!rUxpD>777w;ZlM|K)~*Vd6|8O0Gd z2^if;H|a2%BkOtdgY{&;(L1?;3^BfsZspb-tLDw5(F9jr*t2cT16rrj6V|`1#iLqy zZ*adBQHVoovO0_Or=qYyA&!lba|QVFc6Fn)(+xgIY|vADK#M0ru_!)So#kOAp?|d- zHi()^c@}94ZL&E_pU@6|;{YBHi7mxk z1U*cw0|c$0Z&?p2VoQX+rKNPLjKtJXX9&U*&>s#(pj!fpGTgUr-LFKuG}NsE5{gFU zYe790CuJ!~xQbfqh?yy04%r2!?O?TJ0PT}@ejt(uxY=-L{KL_+#L zHZRm%30=_>hR&dZJ+4J!2Q(sQPYs>N)yy_%J>ZRWXPXdgYIRm=PwB8L)zk+=VeKo% z*#HOm|D9eiT55W84Q9Fa{M3Abn!4bL1T{P9zbvoAUAow0aC(OGPjbjM={LaG*L6jo<(jIu1rOCdXC1SRU{NYF&GSC5CH zT?}4wR`S^28VmPCqA16{)1L}{TR4=^2^@V9pcrB_qD6H!D4Jw~aX9^HDvfN&BrAil zXlJOaCr;yiDS;)Zj1~r4=&@id3}sUq&XrK8+M_989Uf2^<=Fj5Jfx?)(ThMYAbc=& zHZ6Ei$oVA2x)Dw_7EwY`7J4jE+f&`lY-q|%GqE?A7>iTcK$T>>%k5fd_LR7Z!_`zG z*VX5opa6>`O_!%bPEj2&++MAvmwr+guXk- zy^?3#mMI}?LqMhNkaqwjNMp5tbPJ_r_nM_$L}`;$+5$+EnAgL2O#2Kf2R(NI{R#VHgvDdx=y$DUXr*!uZ4z&o`O=kH2>JYiGAA+Ly# zg9!OHg*;_Qen;{N$$}VX48bZxcsJZ-5=B*URn-lT=hc%u|A33w*&ryGq$f+#7%%p=TM3Uh+yQy!KAj45)^BTtCw^~H7o zN>ZSC05V5Tdhfu8GcU3aBm0p%(LCc&$~LnvwrAvEDn*(GB$gfxBr!vxGSebN-~#t@31CK08|oCePELxTrxA3Dg7(8rPobbf!1q@Ox=tbN zWePh3u)-{)S4&)1W<=&$WbXYUokF;?U=eDJ)D)!1J-A8`eb^7&Cn@(d<)%r?U>5RL z<{m;sI*v%^DAI7HXd3}ZFvxJFpw$V)nx$BC6f3p(7-|)=M9ztjzeC99H=&tES93G9 z=7wBsl^k6yD0%@=M<{9%P}wS(8&zB=lOozBL_0&#hHkkYTF$j{=oUf2XYT>X847t3 zAPv`AWY{^E%Zno9074%2q9yy+aL8I?%sE!5{xyP9rx0O~B1~l?Tz8S067kL=-dhx} zw~EhgEk8WNp_wKMDBc~lczUb+g%$D(qk;vbplgA`(9Eg*;ysYsr@iDPZ1e}<#|133 z`MBumpQpe9BAEFKn&tVsaiY%WVmH?cILdhu<>A=VQygn_2XV{w6}h^nEV% zHoMr&y%1J!`sj{n7OEP?D$XY@t5!wpQ>-pUXAzxY)+IQ@Rdj|6o6#A@*QN6!$1OKi zu_($;c_&c9ag^PIrCVU<9KGgI^xg;3<~QTv_ZmnO2a#F($q^@T$MCELSpE6#+48*u z74zKC<>NFDW8^-VjMo~-_aMg|aTh)gQ)C9@0I>Q^7ks8BXqmqFGxeKwIE|)!vypVb z4jNQ27V%nM8O*8((%I{fJAmAux;sq$55nk8V(Q5(VhTkpG?F^WGqQdW5gFA*E@EUo zh`@Wu)c+_V{bUv~iz3FFP{aoyf;j{$ZwjT#Y@gmQ?*~SI<|Sw569~_^0?mADR_5>) zQ077tc}Vh1ZpfibX7y84rc8#LtRV}q`kQUFrW;hJ>1x$1?)Vn)qLF6upyc`JRuq@Y z!xfpOQlmslO^XMRk8Twr_#|pQ*NhQ7R=p@pW@vG=v1$SB45A%vL9`^GvEz)O6f4DY zoTP{|jX0x-BOkv_wtGt8PZeAUZ*qD6p2?G_kz{+ zb@dH(_0_e3o={k=Rs)Uotqt|QHnl0x($J^{o7x(DYHN!x2tORIZ?4zy=Xh#BxeLnB SaL4!=|4RP#lC&Fscl|#;+nrYc literal 0 HcmV?d00001 diff --git a/crates/core/tests/data_err_logs/table_e/_delta_log/00000000000000000000.json b/crates/core/tests/data_err_logs/table_e/_delta_log/00000000000000000000.json new file mode 100644 index 0000000000..37d128c02c --- /dev/null +++ b/crates/core/tests/data_err_logs/table_e/_delta_log/00000000000000000000.json @@ -0,0 +1,4 @@ +{"commitInfo":{"timestamp":1742317197774,"engineInfo":"Kernel-3.4.0-SNAPSHOT/test-engine","operation":"CREATE TABLE","operationParameters":{"partitionBy":"[]"},"isBlindAppend":true,"txnId":"09e63766-07ed-4b57-a6e0-0bba30aa801e","operationMetrics":{}}} +{"metaData":{"id":"7998c165-cb06-408d-bba0-e5c4e532ba21","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"createdTime":1742317196814,"configuration":{"delta.feature.catalogowned":"supported"}}} +{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["catalogOwned"],"writerFeatures":["catalogOwned","appendOnly","invariants"]}} +{"add":{"path":"a735be79-daea-40b6-94f0-317a77a03df6-000.parquet","partitionValues":{},"size":1918,"modificationTime":1742317197691,"dataChange":true,"stats":"{\"numRecords\":600,\"minValues\":{\"id\":16},\"maxValues\":{\"id\":3251},\"nullCounts\":{\"id\":30}}"}} diff --git a/crates/core/tests/data_err_logs/table_e/_delta_log/_last_checkpoint b/crates/core/tests/data_err_logs/table_e/_delta_log/_last_checkpoint new file mode 100644 index 0000000000..74f27fded1 --- /dev/null +++ b/crates/core/tests/data_err_logs/table_e/_delta_log/_last_checkpoint @@ -0,0 +1 @@ +{"version":0,"size":1} diff --git a/crates/core/tests/data_err_logs/table_f/_delta_log/00000000000000000000.json b/crates/core/tests/data_err_logs/table_f/_delta_log/00000000000000000000.json new file mode 100644 index 0000000000..25bdb69d88 --- /dev/null +++ b/crates/core/tests/data_err_logs/table_f/_delta_log/00000000000000000000.json @@ -0,0 +1,4 @@ +{"commitInfo":{"timestamp":1742317796445,"engineInfo":"Kernel-3.4.0-SNAPSHOT/test-engine","operation":"CREATE TABLE","operationParameters":{"partitionBy":"[]"},"isBlindAppend":true,"txnId":"3d3a3463-1ec9-428d-bdc0-8dd8bad69aee","operationMetrics":{}}} +{"metaData":{"id":"f8d0b1b0-efb8-4323-9c35-69d18d2eebc9","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"createdTime":1742317795464,"configuration":{"delta.feature.catalogowned":"supported"}}} +{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["catalogOwned"],"writerFeatures":["catalogOwned","invariants","appendOnly"]}} +{"add":{"path":"67cfa9cb-6933-4e2c-90b3-5b0c5b513e35-000.parquet","partitionValues":{},"size":1918,"modificationTime":1742317796362,"dataChange":true,"stats":"{\"numRecords\":600,\"minValues\":{\"id\":16},\"maxValues\":{\"id\":3251},\"nullCounts\":{\"id\":30}}"}} diff --git a/crates/core/tests/data_err_logs/table_f/_delta_log/00000000000000000001.json b/crates/core/tests/data_err_logs/table_f/_delta_log/00000000000000000001.json new file mode 100644 index 0000000000..3d66057360 --- /dev/null +++ b/crates/core/tests/data_err_logs/table_f/_delta_log/00000000000000000001.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1742317797041,"engineInfo":"Kernel-3.4.0-SNAPSHOT/test-engine","operation":"WRITE","operationParameters":{},"isBlindAppend":true,"txnId":"7bce2352-93ed-4f0c-9f63-42b8600d93d9","operationMetrics":{}}} +{"add":{"path":"ea72200d-614f-42d0-873e-86e7b9442afa-000.parquet","partitionValues":{},"size":1918,"modificationTime":1742317797038,"dataChange":true,"stats":"{\"numRecords\":600,\"minValues\":{\"id\":16},\"maxValues\":{\"id\":3251},\"nullCounts\":{\"id\":30}}"}} diff --git a/crates/core/tests/data_err_logs/table_f/_delta_log/00000000000000000002.json b/crates/core/tests/data_err_logs/table_f/_delta_log/00000000000000000002.json new file mode 100644 index 0000000000..e4603c5c6b --- /dev/null +++ b/crates/core/tests/data_err_logs/table_f/_delta_log/00000000000000000002.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1742317797595,"engineInfo":"Kernel-3.4.0-SNAPSHOT/test-engine","operation":"WRITE","operationParameters":{},"isBlindAppend":true,"txnId":"14f8e260-767a-498e-bcfb-3b18eda1edd5","operationMetrics":{}}} +{"add":{"path":"b86a2fe9-f9d5-415b-ae8e-78b9c46ac18c-000.parquet","partitionValues":{},"size":1918,"modificationTime":1742317797587,"dataChange":true,"stats":"{\"numRecords\":600,\"minValues\":{\"id\":16},\"maxValues\":{\"id\":3251},\"nullCounts\":{\"id\":30}}"}} diff --git a/crates/core/tests/data_err_logs/table_f/_delta_log/00000000000000000003.json b/crates/core/tests/data_err_logs/table_f/_delta_log/00000000000000000003.json new file mode 100644 index 0000000000..43abaf0510 --- /dev/null +++ b/crates/core/tests/data_err_logs/table_f/_delta_log/00000000000000000003.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1742317798121,"engineInfo":"Kernel-3.4.0-SNAPSHOT/test-engine","operation":"WRITE","operationParameters":{},"isBlindAppend":true,"txnId":"f802c7fd-4be1-43e2-a7bc-33a1737ecb2d","operationMetrics":{}}} +{"add":{"path":"1971092a-aabc-4a36-a792-a4ec7f98084a-000.parquet","partitionValues":{},"size":1918,"modificationTime":1742317798117,"dataChange":true,"stats":"{\"numRecords\":600,\"minValues\":{\"id\":16},\"maxValues\":{\"id\":3251},\"nullCounts\":{\"id\":30}}"}} diff --git a/crates/core/tests/data_err_logs/table_f/_delta_log/00000000000000000004.json b/crates/core/tests/data_err_logs/table_f/_delta_log/00000000000000000004.json new file mode 100644 index 0000000000..06abe1b599 --- /dev/null +++ b/crates/core/tests/data_err_logs/table_f/_delta_log/00000000000000000004.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1742317798598,"engineInfo":"Kernel-3.4.0-SNAPSHOT/test-engine","operation":"WRITE","operationParameters":{},"isBlindAppend":true,"txnId":"d460c3db-4f07-43dc-9c93-1f43dce18f41","operationMetrics":{}}} +{"add":{"path":"e18d2d0e-b059-4842-8e9a-773ce0289a71-000.parquet","partitionValues":{},"size":1918,"modificationTime":1742317798593,"dataChange":true,"stats":"{\"numRecords\":600,\"minValues\":{\"id\":16},\"maxValues\":{\"id\":3251},\"nullCounts\":{\"id\":30}}"}} diff --git a/crates/core/tests/data_err_logs/table_f/_delta_log/00000000000000000005.json b/crates/core/tests/data_err_logs/table_f/_delta_log/00000000000000000005.json new file mode 100644 index 0000000000..05956e13f1 --- /dev/null +++ b/crates/core/tests/data_err_logs/table_f/_delta_log/00000000000000000005.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1742317799048,"engineInfo":"Kernel-3.4.0-SNAPSHOT/test-engine","operation":"WRITE","operationParameters":{},"isBlindAppend":true,"txnId":"da5d8fc4-b50b-4287-9362-080ed61211b2","operationMetrics":{}}} +{"add":{"path":"a1885a41-cf1f-43b2-9795-013182d51033-000.parquet","partitionValues":{},"size":1918,"modificationTime":1742317799039,"dataChange":true,"stats":"{\"numRecords\":600,\"minValues\":{\"id\":16},\"maxValues\":{\"id\":3251},\"nullCounts\":{\"id\":30}}"}} diff --git a/crates/core/tests/data_err_logs/table_f/_delta_log/00000000000000000006.json b/crates/core/tests/data_err_logs/table_f/_delta_log/00000000000000000006.json new file mode 100644 index 0000000000..065299eb12 --- /dev/null +++ b/crates/core/tests/data_err_logs/table_f/_delta_log/00000000000000000006.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1742317799455,"engineInfo":"Kernel-3.4.0-SNAPSHOT/test-engine","operation":"WRITE","operationParameters":{},"isBlindAppend":true,"txnId":"a415edd2-9413-4635-b864-e859053e6902","operationMetrics":{}}} +{"add":{"path":"05ce144c-628e-463c-8a49-b9a3e2a0f03a-000.parquet","partitionValues":{},"size":1918,"modificationTime":1742317799453,"dataChange":true,"stats":"{\"numRecords\":600,\"minValues\":{\"id\":16},\"maxValues\":{\"id\":3251},\"nullCounts\":{\"id\":30}}"}} diff --git a/crates/core/tests/data_err_logs/table_f/_delta_log/00000000000000000007.json b/crates/core/tests/data_err_logs/table_f/_delta_log/00000000000000000007.json new file mode 100644 index 0000000000..6353238177 --- /dev/null +++ b/crates/core/tests/data_err_logs/table_f/_delta_log/00000000000000000007.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1742317799859,"engineInfo":"Kernel-3.4.0-SNAPSHOT/test-engine","operation":"WRITE","operationParameters":{},"isBlindAppend":true,"txnId":"67e8727d-3d94-4e38-8c42-f86e21912a5c","operationMetrics":{}}} +{"add":{"path":"94443ade-9c5d-4efc-9bd6-fd5d39bd3925-000.parquet","partitionValues":{},"size":1918,"modificationTime":1742317799854,"dataChange":true,"stats":"{\"numRecords\":600,\"minValues\":{\"id\":16},\"maxValues\":{\"id\":3251},\"nullCounts\":{\"id\":30}}"}} diff --git a/crates/core/tests/data_err_logs/table_f/_delta_log/00000000000000000008.json b/crates/core/tests/data_err_logs/table_f/_delta_log/00000000000000000008.json new file mode 100644 index 0000000000..f891a3f55d --- /dev/null +++ b/crates/core/tests/data_err_logs/table_f/_delta_log/00000000000000000008.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1742317800272,"engineInfo":"Kernel-3.4.0-SNAPSHOT/test-engine","operation":"WRITE","operationParameters":{},"isBlindAppend":true,"txnId":"83cbc057-f514-4cad-a563-47894d21a89a","operationMetrics":{}}} +{"add":{"path":"12a92422-f105-4d85-af99-248e16bdaaa1-000.parquet","partitionValues":{},"size":1918,"modificationTime":1742317800269,"dataChange":true,"stats":"{\"numRecords\":600,\"minValues\":{\"id\":16},\"maxValues\":{\"id\":3251},\"nullCounts\":{\"id\":30}}"}} diff --git a/crates/core/tests/data_err_logs/table_f/_delta_log/00000000000000000009.json b/crates/core/tests/data_err_logs/table_f/_delta_log/00000000000000000009.json new file mode 100644 index 0000000000..0b9cd01640 --- /dev/null +++ b/crates/core/tests/data_err_logs/table_f/_delta_log/00000000000000000009.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1742317800696,"engineInfo":"Kernel-3.4.0-SNAPSHOT/test-engine","operation":"WRITE","operationParameters":{},"isBlindAppend":true,"txnId":"ef305401-d8f7-4db7-afff-31ae85d6b78c","operationMetrics":{}}} +{"add":{"path":"a236c3b0-99e6-4ae6-aa49-2f90b1e3cb2c-000.parquet","partitionValues":{},"size":1918,"modificationTime":1742317800681,"dataChange":true,"stats":"{\"numRecords\":600,\"minValues\":{\"id\":16},\"maxValues\":{\"id\":3251},\"nullCounts\":{\"id\":30}}"}} diff --git a/crates/core/tests/data_err_logs/table_f/_delta_log/00000000000000000010.checkpoint.parquet b/crates/core/tests/data_err_logs/table_f/_delta_log/00000000000000000010.checkpoint.parquet new file mode 100644 index 0000000000000000000000000000000000000000..9d4d415f06ca0bd6e4e20a45892159d01fe29b9b GIT binary patch literal 12260 zcmdT~4RBP|6@I%}LP#R=cONe?#F)YwH81Xec4Jfsl!78KK(*TG)VKTY;*Oi$ba#WG z(mJU+DdWTrD&u4*t;(bnXBe7hkg+MIicCnI4AWX_)jFAKtuhRy3}vRu(4KSOd-uJ4 zn}khMMrC(+ckesrJLjHz&b{ZJoAt{!c!@wN;q&pQuf0C2hE&7PQc@-ePC*c=g=xL) zo<-pM%)T>yB^F>8055QqxIFOmMSXE!G#r!TN+c==;z3o8H2V};4MfzKFW4Lk`hv2@ z<7w(pGT-Z@Ipk4%{!q*x^~jM34aoro{!^4dME1oao~W1lV^Lqs&g1nd5nsUPljB}b zP!6c!psd6rz!L~lZz!rNisH5NL;`_;Us0(XiGf-H8js15s2Y;vYEbn<%O5G!%@d4K zZy*qpL%uMT10jD*4x?_-h~lR{#S{1V6}ue88x9ARfLD&iy>U6Pyirn1nk5P{=98sDJ#^a4Ndp!{_uA)R^U^hS5 ztvTY8l>m)3$0K1+I8dNlG#pZVaT<~15f$v_4Mt^!!pNG#(MT*1Qes}1JKOwFrP=56 zcvLy$4ZwInO*!1`r*b$%o1`8p!U=?+xzR2pEJ$$pfxo6&rBeg2cHj5jC@#X(aUO^ zFZz8kb0d&C_*{Bp=eaUt6nvhcfeXn}Hc!(VjEfv{;P=2Yf81|4!94)~NPViaeFKf9 zGitWJCFJom*0(28cPq(G3cnwzPpI`R-q3dFRkr5$`h7t!^rt$L$>r(JRF0GTJ=?d# zbpms&E3_z23M&zb{QKyo?yC!o&9{RhNG26A>B}O(iy)a) z#8>O({p5af{CM{-3W8iHaV!Q0jfYj(^k6fj@n{$}51${lD_AQ-5f!%xN~5uez7$+I z^3(m+C5m|)z~Cx}(KFe5H*u3E`p5otmEjkVAh5Lt_;Ib&w`V}Ypnl<_mtNtLc)05F zoW`q^EI1whU1ZV*jNsj;h-QVzyRG(WU z!MO#J+u-x`pa1j^j*=%B2up-Ypw!h}r>pxgd6=9%c?c)37C=e~aG}#kK#wK@zS|4< z%Hc&KwMEG!6j(Izm(tNeQ|h`@avOkLiRi7G>!^|h_!4K8SS?)cr?X1(Of}B~mZ_#i zsEn%^Wy*|frAp#WgBf%!Gg1a<8rg{DwFAcPa2Op^GQ^c52DO(?ck7)#g#0D5%9XSC z*|R& zfk|IlAZGDBph_~^76KP8{#6otSi5n-hi+=K}Ra#ee+fNI3xw%wVN#*7$Zjemw zX8CVx#4^$%qBb>R8*e?zTac{3I!k|lww$cdn}Zd+d8CrG>AjC#Mb0_&_PN=-d*Eua zRPQ}+HV5XIy=HS@uGu@6%NUwR*6Ij7^Ett&**rtY3Z0-|n+ zZOm-;RhhJAv$xu$HJg3cn6zfIw}$t=Z8m!r@!nyx*;UJX2Wv@>VZV`U`8apf@#aV! z*`zDH=Q?hlQ)cr}J-KK=^e^TVmll)tI^XCL(rr+jbd$K=J$OAIURMKe4mXgbPB3r- z*=O**esWR+1xifAZ4T~<5g6ZEYNse9ZK#2oi!*( zSHn7%OXPr6J0e1j!MUapV1q7c)`N80cemhq66y|2a0&DnVk<$=4EEOOK}B5I#KTn6 zX-<1uO~exro`Ak7(GJ}LP?TdPyZiwqwV9%BvtdD@Og@d}(iu{&Nvg=g4`nmm1SzzE zk3*l0>aWYJP9+m5x?+p*$_dJprsMG}%|WIFbQP$_>Qvjd9L;L#RK%2wnn)?hL~a`j zcIA}K+35|}Db0#1S-K(p&}vn){6bZmKj>PXZf{TI?l$IynJb%9a#>wxP@yzK+tXVp z5euh=&Es6dHfTNIZSN>FA=uO@EudR-kcU!rWgWhUc*r;Kc;Wc<7YQ1P?~Ej0+EUhG$|&g-WxrOritb+?ahqDvhTz?Mlvw zt+I|xdP_p38H2(p%}g_vDS0WZ4tYU|`c-DxLb5TJNu)L#@Pagl$Nutkva>yfa!S{< zYQgVHCbBsK2Vw-62{GDEQ#oT$Y>|m&;3P;@7TK_pTop^F;)%_j85ZwT2`rn+XkoBL zE*(oJq1eyDc@9*zHc%*dXLvwilwKJbi0yB z8KGw^)TLUtVKy{nzL|_SxG>JtRs&TKtE+0NE{5DtRjjNEmIMit3H9(VvE?~ zE^7_Jd9Vh~onRt3u=cl+1>%lIQY*OIYKY6iXlxFgwavzQ3|aRv)<^SMtvd;4f7HhQ zF0vnE?1!Kr!F7gRhwV0u5fY+TXSokKT0^deDW5l|eyFut=1d!r#E%-u2M)n~77qNC zRZGC8t4csW`}$~ZdAQXv#n*^yfXg=ICzlgZe7y;cJy=mBq867NbFjjOxf?OZ80HbE z#NZ6NaO%;Fxa=~{t>RYqkxF-WWsP~DHd~^rfgiKYUZJWHZU3I=aRdFP-)BpBWa@+|oP zlhh`*0Zl6${NW48>}kasL)|~SnbZsJ0jLG*?iNNn*^&us62Y*Sic5jVX7>w-x7&kr zd46&{Tk^pn&)XrNS^~EmJ!#=k19NQJy@*inXxdEo$XNF}#l21-aQ7^zk?xspX=#F6oht?m5&D(Wd7K4t5T|OtkL+;@?tYQ4 zDh+yyZti2a_4bQ4ulYT44+n6fPXn{Doa)SlX*`>EL^^Fp`UH_aW=MSt&4p4&vagj0 z(YMg1%q4_)Jc!DS6+(=ARN(rI*}5(YpB?omqV8;F=BqLH%kdSpZkdSITVvDlT|_&`(B8f*TEVV~kZ;=|&m!a~ zgX~$vU27ssS}F}ZvU(QTw0v?EK%Qoh!zQG$;Mn#mRBG6cup1F}gwbkUwOlC!Vbbji z@w#el8ucUIK8804ct!~D<1QR?*>IrT*5hyw*0vT`dM}Pjlp2aEDPLGF1h zOb$T$KXUCj<6`G^HZsf)N3OM*Wem-7K7wZ6Q8x~|^>oiscGTHWE+ERz7DO2_QNrA~ zdNqJo0T|LIaT8Ft^4kX90BadH5WLSi=BHMJw1X|AMR4!A4t2&8&$sNtjQYX^N!p(4 zY-4y4QBSckoSF)%Wu3y))G0geVZ`me1#NSvejE$u)i0P~pj>;=0mQA+z0_6}5>knc?C5*HhY)^4PM`>-S%n!gD`gYr~R^u7(x-B+)kQc$7`5C zL|#3Ri|A?q5qNW`{f{EdUse$(QN*sLC}J2y=;v8J2wQQn(5I7P5*SeR-U3KpW5BWoERPQ?NOu^L z+B0?fOXcs&FL5b`75Z6szH)~Rs^*WB%OX8GzecHfw-NyBYj6pooH)6&Hv<{p$ON=68rMrSQ2) zY!-w$`36A{UP=i+lL1qsVriLp{0A?WQk6tru+9`fOL!!eZa8>2pI(Xf7#%N4vMejI z6$9Zxb1&s0pGFh|C){f0z+svEMa zOTMYw5>`PZqF7WiJ=yVmSNEvtOWSq}`z?u3L)CQKl{_09fR-&ex~)lyrW4a~45kuY z+H!-iob2#zm%64c*_!G~y5{&2VY(yvo^05jq1dkMPrj476RTyGVi`Ud^L=1CCY5a6 zOx@B+spaaLt;mz0E$fo3II2l3pcxDc!crwi@l1)xOg0_EF+4@#Lv@Mykc3QJNpmb8 z64O1&aWzx&D76(=wsee^wsfQx9qv{PT{c}`f{d2m-)A|Jqsb5^MomvLRFGOQMbnBT;2vavab%Waik0r(%t4(oN}xL~NJ25WD0PebM25%%v7FCBw57 zO>-Qk6MHB0?dVihHONEp2uOn~mZ%Xhbum(Wgc^}mMKWX z)g04dmSkHnUftK>X*Qgg`BYOK%TPSKM4RrkB-8R0hsY+rF)szB$>E;-SHF;q0;@`MRGiG}D_2)n$qJSia*CMN=nf z(|-7Xi6Ce@4q!AyJ>BNMuM=NVJyUm?Ll7ysPfuZ)h)uo))3@Q82*wQDY7Q)d5=GZN zo!aK)+J^nGFdNeLR2VkQ3G;Nrlc09MW^BTAS=_G|`8Q9rwLZa=kn+(G1aWKGwItNNaWn3uYt z8=+p;U^v9Kvwt|Z9d<+u-ACWvayW$$uNp*F7;=c>`k<;oehxB=M2JnGEm*7>g!pzSMKj4MuN{_XT3UAQ zTzXpfeb_IxUCS^u+bngYIyA{f%G2Caf8LCQb+mgGG-M$oDjrnkFb@kyY#^YgTe4}w zhkVVKv-6r)K|Csf`*?=oz@3mz_FTy9P%WbPDzcM{h^VWGtwg2>`B(%j0kPB6CJ(28 zCmW`zx=3p#tT8*%vJ9aXQA$GagMsKCad#e1uSJkz)Ic_dK3&y>4H^(i>eX&&)pZpc z$-$a@yQvrUVpGc@@NFJ>X||;`qf3hx^hlO3dkWGNb^X%e)RZn+Ob1`6Fc=SwpadE) zT?vWBVy2-|nZaJ*tP*myNo=e`@eOd8+TCk7Oj1-$V-8%^B6J%v07(0arueQ!Bv++t zn;G#=J&1&;sw$Rj*_K5ncQs}^uFCt4P8^dRJ=B4i=b6x2_UqYh*HC@c_ASM+cegg= zb%p7&EoXPM-VGnsv0_9T4S4_}K?*liMb!QfyY6-)=Kdc2eiIT$`CfJSR{wLr??;LW8Tq z3>nf3Wds>U^(;zDZYpm{K2;QF`Clw!E($c=P+&$5j9hiHpSfzjZ|ijDWAmmE3|(7w zbjN|mA(bMPBY;q(X)F;HiUphUO4nw35bibGR}};dxABIIYh;JAI&1{%@KHD+L7K=X z%V2Q|Ok750q==*luER?l8|yNW36$(tnlrjq`fj66Bm^N#bz$xBx1(+C7F2A>5AEuN z$rw7qo`clQZAMXK*Hlf0h^A^72BHc| zHUxbSxk{y!nD8b`Rwi2;xV@+<>Hr@xRW-d+W*cD;-qFn+9z&8v1$A`mmiyD)usKim znB&WKc4+?5kM)^II3}teSf8Qz4%I9M8KXS0pnJqlOVX4F{-WR2d?a{JmSh)}LVXvO zIXRXYKwOhGq#Tq92#^RsI`UNZyH_J`$iAMPx~dJOg=xZKvcJ}{jM~JpVEgF7NTlZM z;Jgm_prsqWmEE^M@JiQYS7{QePn3eNI*ow=U0slF*KLGi%l144MYiuuem%P%86GvM z;^~SfdB{wts0c4ZPh zDU|vy?8H_zgaRK%WB5qTDvZmZh%yfI6-zE1Pj`SCGKNC3UuqHb?C&qgzBISjhJ#6t zZNs`yli8&7t@+)se+_ZQheT6)cFlrSNK#k?(!Gh)jY3TGO8;eU8;S$VK-!#-z`z!{ z&AO?dIwqqcM%Bfz?Lb0CuH)WrBM({{GPmYIeW=6Wnn*kblToc0j_Z`*oyt+FYbrw~ zLWD$_sash&hav*?qDfI^BCvWc5;FZt)>_c+BL_1B3hHxQEWp&@RGQ_RvWm>R^Lz`W zVo*)CY@eVYbzu4q>L02g*J{XYn#>GaRrl-`c0jDy9B^BNXmym*FjfR6)rE##SAz^4 z#MM01ui2KL?Vr2+cpr)}lzvd3ixSmzZQJ*=?d_B1oNnyI9CXQ&?__@{W)v-T12!O% zFHCc7xV-S!lf*^;ay)4^DWuWq72q%tR`U(Os z_6zW;?5%A&ycx+Abu7Y~!i$9tc(BYgPcI>oH;J9FF^|D^y+v-1i+zz}qR7=P^VG*P z$Q7Hj+emuKl05i;i@k!RA%vhruryR!2pv9ZHV4InL3!zRH{{?ss9j|gSt>!)(g{)> ziq>pyL8n6v#WW0sAzLH+Dv$tnw2+1CFM0<7@2J=VIp`X)jt5omypZXESWycYjx0;+ z1OGIDG7a(1gpWG1>$r$fs9sCQ<_Ox+IYo+k!9#+hFj17oD2PzRsVG30jUp~9e+03p z9ccsm!lQ@4DKZjv0$4A$jVh{U$Ay+18&wQ<4X^r8bD^k^N(LyUr9LC z*p})R8kaZs3Js}t0bo;2;x`+_CtrW?$Bn(B!?AZXESdPv`r$7ESbx5yIrHEDrlnbM zTbncOPqegUPJbI+cfEiv(kf>@e-K?wFX5BZN-}?UPfMGiwRQ_jTbqU7XdMtff}i)d zZWs2o_MtDIx$2pgWx`17m@tk(U&R0OTRVjf_@Zm7MHb#@J(T(1e-4Q6x9$)=g(=%w zW#Jn9KNs!$TE8LuE!u6Zd10WH2zy#L3Eyk&%>1`+x7;nf2|DWmcMhFRt;>Y>THA!} zXwPfS_ul(LxEAyI@FxWIjgdrI2Qz@#ZUs={Ue--WMd$_0n3*@Ty z8^5!v<%_59_~^dD|9WiUUHdxsKe+WD=J?CIuXFGC{EES)3zW_SZ3hm0>CwK&9{ZOs z?RfCyQ@{V-)c^RWbl$2x;dpIGtFm3qDfI{v-IBO8Vb%pD!~CKg|(^ei48 z9J!qiZD9EHdy5CX#n;(nD|+ds+po_FOGY*f4XqsAFj8FH+S4p=-Ma5Kq1jNkuD(rZ z%)N1&kTK0Qy!qyBLg8@KrD5!))tJK= z;pRP$Hx$+iDRIrM!j~UyD4a(>r}St;G|4VJo=OFLXiCh-T8!f0c9}THL~ICJ;X?GS zQ8*W(2zt3MV_9o%4fz2Rk7UX)Uu_IqW(7={h+3WunD;{gfN+_JQYeoj@wa*)C@kZs zz2bk(4HmlZp*e-Vxxq5g6s*FJK^r5oM)QiFMxaWe5OGclH#LO}CF=^;%4PEtEP(&y z5L^mMz%=i%KLOg+B?kfF@*E7L8t5Qx;9? zhbwN0L}$lKwE}K97iL*!&_y38an#4sx1Y^qw)VVvt5CS90jqyZ`0|g_h1G!M><&cB zH5c_>|KoI^WlUZJ=K17vslq#RLWw<|0{lbKqKv}9XyGIf7)N?9n)7(7a4sFsQ5L&QW|IwjMQ(g zcpS6%H;07V`!k>X(wr|Jf*JF^Ljqh%&{KN@P4gi?jl^*)A1#pVWb=wckr8NxvN>1~ zc-hPBLVv`mQuxZ8YVPc^V^+9v`7jUo? zz-BvWDN3H_PJZog=T$;|^o}xL4S<>hzM|_cpz;c4`@j6%v1CGZ1;bf@B?UaxH~K_M zd^shSo_%{kB{zdVD09Q5^xHvNE^Z!Ui?3TeQ5@gk78my{_6OMzPtm`Fj7^96S4hr1 zi(7x~;NVD+tz+Z(z_T-ThVUh3F=TC4#8G;YB9(95de_!yZ9n+V$vw&SWOecXE{X&xDjd*4l(&)gIDo=uw1-W&IxzgIj^7U2G2adpJR>7@C@i0DMUyGLVL zzL_+SkBQsU(TL;Y@rZ{fVjY~Hh3`~314-T=LqAdYe z$A@DNzpzRIvZhe2LU%i6%Qo?Th zxj#&a?YZx77CVKm)g5A{Vs1GdB(qmn&weGC{a!G87cOQhr&s2W1Y%dJ8si^dahPgLt*^7CA^N49a*?(Dx5QnfyJ_E%%=53o)ISZ@SaK@1>VpH-DjWknON zxi8n>^+pZklL6$90>~pb#eO*xWK}VgpdPuYn$cqc>Yn@gqAvh79V%lc)ToY1&@R-V zy%vfvKznh`CDAGiu>|tPHPsBB2_+do?z=e-X)`f6ucDGmAotx|4O!a575$?C^7LgO zD@wrx@^lU4fdF#P_7s=nlLIlEfdePiV*>HyKs8rK0*E6a#JMVnixr#+;JF&W7XrW& zA>i>_Vv7lQnxRHbu#VqS&D`IIA`7wh6sBV}1_f<6efMWmxieRH?J4lQaP#F4w=Q`y z6<$C`*VDL!&d&ny(x+v?jdlW98A#s667#t|52S*GG3h01j0*IMjK z_0+!!jNljm%IP-(s4e6}nQ^pZHE0h9Xs?85C6>g=2(2bma+ngUX6hRO%$X48g<6;y zHw_72FVq0O8~{$?*X@s|NP50CwuLVc1xIIoKKF zI!AtTeKo_+1;|f@$Y+3D4(u~CJT10!#4|OBCj!J*Lc}A3mqVknA(}To$$_&U4sj?y2T>?-v6X4`I(Hu-%bZt3y3U zIa`B*+b&qsD_7BgnwqU;=uI~EWYK*BC&;Ja> z^Zbs$9!}2$QMtoUkWSYieP$n5-VZ{gCq`msNG#~4@Khi^4G7Ubm2^73uR?$K(gJQW~62V}(ZH|s&J zjJX{9%^K|E0ru-5_HjIZtTD)GXBwf9K9kOEPj?+3>&^f6mELh&^mhD1gyyRJg=Dnp3CXq^W$u6l#(i#qsfKgL^kHiUYV#fx$gIQ42l& zY-J`(=KJ!8C#r#W{#)*q&xOF}>j17cCEiL%gD+ISH%(+=knGNp~PC389;^HxaaB*;VsW4xHE)4q!s)5(q1IzJl#AJ`oj z)6@6O06hIL@FKuZ-&YO1dw*a|A@I~D4jdlgWv8DJMrVVePw+C0M zwd`Et4PwQ$Ydr4*SXbp8?FYvRFLaan)z4clQu-y@sJH~a0+foaXo6CwjV3qE^+D7T zLE^jX`Y0sQy2xes2ZgJ>9Z!(z4dB7aW4+VskX?A67ZZpb_Y$6RWdXSCqVZjSOuNqL z_cMuFGd(Cs7^*u6{{NI+%8L4BEKMHNB&Jb!ME+$g4U67cd3hZHto+U;R?TceSnv-H zB7W+P$icr^D3|bcR|plaY+{b<1__Vf;`y^#arxm+JgIJ?(L0|QAg20rshI1qh{}gV zF}HOuHxA{|yQ3H>UN;>dr<>6oy*sMj8sPEK2i_G-d$?3f`R5|>q&%v4_!RTnBa4$sw(w5|YZ`qQo$C|=NqXgAZ3gMJRA>u}5ICrh=wT|&>{ z)ObqWzC5!#)eRH9eM|V5Wqo*AA>FlA$gf7g;m^dR;qlJs)TUEes%T4<&TePJ;E>np zIXZp=pd#a!FFN>Th77;EQN!;J!>`lxC{s-SJE8CkJ{)>2p5@_*_5b641bwL>4B%Id F{vT}G{2u@S literal 0 HcmV?d00001 diff --git a/crates/core/tests/data_err_logs/table_g/_delta_log/00000000000000000010.json b/crates/core/tests/data_err_logs/table_g/_delta_log/00000000000000000010.json new file mode 100644 index 0000000000..c1f8c8b095 --- /dev/null +++ b/crates/core/tests/data_err_logs/table_g/_delta_log/00000000000000000010.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317342605,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":9,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4818"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"ba4b200d-e0ac-4715-ad4e-bed8ef1b20cd"}} +{"add":{"path":"part-00001-55fbea2e-0788-438a-a50c-65f809acc05b-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317342581,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":90},\"maxValues\":{\"id\":90},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-0d891eaf-0e01-46a6-879e-49bbca90c215-c000.snappy.parquet","partitionValues":{},"size":451,"modificationTime":1742317342581,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":91},\"maxValues\":{\"id\":91},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-569d12e9-04d5-4fe2-9554-f288047f3386-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317342592,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":92},\"maxValues\":{\"id\":92},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-1bc89ec9-8092-49e1-9b1f-123ae50e3d40-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317342579,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":93},\"maxValues\":{\"id\":93},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-b4a223d6-1d87-49c9-84c9-a85eece61839-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317342591,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":94},\"maxValues\":{\"id\":94},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-4c3b6be7-979c-4f42-8920-efa32b751d97-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317342578,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":95},\"maxValues\":{\"id\":95},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-6c224359-8995-417a-8b24-b2e530327bc6-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317342581,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":96},\"maxValues\":{\"id\":96},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-2a40eb21-34d2-48ca-aaa5-55db674f56de-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317342590,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":97},\"maxValues\":{\"id\":97},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-ca8256ed-98cd-460d-8de2-9f6f7f388703-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317342579,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":98},\"maxValues\":{\"id\":98},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-f131fc78-c201-4e8d-b194-222b2e79778d-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317342578,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":99},\"maxValues\":{\"id\":99},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_g/_delta_log/00000000000000000011.json b/crates/core/tests/data_err_logs/table_g/_delta_log/00000000000000000011.json new file mode 100644 index 0000000000..05863f6cd0 --- /dev/null +++ b/crates/core/tests/data_err_logs/table_g/_delta_log/00000000000000000011.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317349152,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":10,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4818"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"1657fc30-879d-4b0b-972a-4e3a079fdd7a"}} +{"add":{"path":"part-00001-ceaadd5e-615b-455d-8f4b-052b9c94c7b6-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349136,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":100},\"maxValues\":{\"id\":100},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-b7dba1e7-b1e5-4f02-a223-69ec7353ab45-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349123,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":101},\"maxValues\":{\"id\":101},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-f099cf4d-d418-4852-8580-091908847a66-c000.snappy.parquet","partitionValues":{},"size":451,"modificationTime":1742317349122,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":102},\"maxValues\":{\"id\":102},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-418e8d25-7316-442a-9bc8-616ed01231eb-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349136,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":103},\"maxValues\":{\"id\":103},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-7538a9c2-1ccb-4150-b162-ef8d826fe30f-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349123,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":104},\"maxValues\":{\"id\":104},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-7619f42c-5bc4-4e77-b037-f36481c8b63c-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349123,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":105},\"maxValues\":{\"id\":105},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-eb49d85f-91cc-4293-9339-a664ee905b0f-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349134,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":106},\"maxValues\":{\"id\":106},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-81e22719-7705-4703-b2dd-c4e2982217a7-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349122,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":107},\"maxValues\":{\"id\":107},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-883858d1-9df6-4b55-a2be-5b8387134617-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349122,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":108},\"maxValues\":{\"id\":108},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-07d91938-ac89-48cc-a657-6067d2d9f67e-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349137,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":109},\"maxValues\":{\"id\":109},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_g/_delta_log/00000000000000000012.json b/crates/core/tests/data_err_logs/table_g/_delta_log/00000000000000000012.json new file mode 100644 index 0000000000..4cc44fa8e8 --- /dev/null +++ b/crates/core/tests/data_err_logs/table_g/_delta_log/00000000000000000012.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317349950,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":11,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4818"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"9a035bdd-f892-4449-9c39-401f31fcada6"}} +{"add":{"path":"part-00001-f3b19100-b5b3-4e72-8658-7a937e9ed515-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349924,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":110},\"maxValues\":{\"id\":110},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-54f2324a-e97f-4def-9101-9cc10599ba06-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349919,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":111},\"maxValues\":{\"id\":111},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-3f7ca40a-6497-4208-8a1a-11062456a5a9-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349936,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":112},\"maxValues\":{\"id\":112},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-a14852b2-c743-4a4a-b9c1-0c9472c51699-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349929,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":113},\"maxValues\":{\"id\":113},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-97d06207-5584-43df-afc2-2d1738d79193-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349943,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":114},\"maxValues\":{\"id\":114},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-0d431f03-6dbf-40e7-96fc-b1ebbbe9fc65-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349922,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":115},\"maxValues\":{\"id\":115},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-af0f0232-33c8-4315-821b-8bb1323b7a26-c000.snappy.parquet","partitionValues":{},"size":451,"modificationTime":1742317349936,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":116},\"maxValues\":{\"id\":116},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-4f744428-d088-497e-afd3-0b374e453e7c-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349936,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":117},\"maxValues\":{\"id\":117},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-694064b8-137e-45cd-b2ea-e28af172a2dc-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349918,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":118},\"maxValues\":{\"id\":118},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-56073753-4c1c-4a68-9b4a-13ef5d1a75fb-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349938,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":119},\"maxValues\":{\"id\":119},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_g/_delta_log/00000000000000000013.json b/crates/core/tests/data_err_logs/table_g/_delta_log/00000000000000000013.json new file mode 100644 index 0000000000..b2d03d3ead --- /dev/null +++ b/crates/core/tests/data_err_logs/table_g/_delta_log/00000000000000000013.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317350712,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":12,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4819"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"c3cd0fa1-9c72-4344-8225-0b787e52d5e0"}} +{"add":{"path":"part-00001-7a0d95f8-e122-4cf6-b89c-389036a9b415-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317350696,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":120},\"maxValues\":{\"id\":120},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-f1f035c1-bf0f-485c-950d-c81d0d2aa8a2-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317350706,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":121},\"maxValues\":{\"id\":121},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-d7a51e45-70f3-4379-819b-341951abefff-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317350691,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":122},\"maxValues\":{\"id\":122},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-4828722c-5799-4be1-ace1-14bd7f477dbf-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317350691,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":123},\"maxValues\":{\"id\":123},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-f3c3c72e-5d71-4dc9-9e15-342f1d6cb6cc-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317350701,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":124},\"maxValues\":{\"id\":124},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-e8d74ede-8876-4f55-8e9f-1bbde0d07a35-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317350696,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":125},\"maxValues\":{\"id\":125},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-91052146-2292-45c3-b57e-1fd2dd6be6ed-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317350692,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":126},\"maxValues\":{\"id\":126},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-9d203964-9f4c-4c84-ad77-9ba305bb6572-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317350706,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":127},\"maxValues\":{\"id\":127},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-8362228b-acf6-4937-875b-26c013c342e1-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317350690,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":128},\"maxValues\":{\"id\":128},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-6f57658e-1953-4b59-b504-27c9e8c5cc3b-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317350677,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":129},\"maxValues\":{\"id\":129},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_g/_delta_log/00000000000000009999.checkpoint.parquet b/crates/core/tests/data_err_logs/table_g/_delta_log/00000000000000009999.checkpoint.parquet new file mode 100644 index 0000000000000000000000000000000000000000..9df253f2f762891c53763e62e57576217a8b289a GIT binary patch literal 21615 zcmeHve{@vWedn9eNJxOjGSQn1ss0cYt8$MXeZSv*J~}DCYRg2%@)&#F)J}Zgd-s8C z{i-xzpbN&@WK1Zf3Lz{>Xcs&FL5b`75Z6szH)~Rs^*WB%OX8GzecHfw-NyBYj6pooH)6&Hv<{p$ON=68rMrSQ2) zY!-w$`36A{UP=i+lL1qsVriLp{0A?WQk6tru+9`fOL!!eZa8>2pI(Xf7#%N4vMejI z6$9Zxb1&s0pGFh|C){f0z+svEMa zOTMYw5>`PZqF7WiJ=yVmSNEvtOWSq}`z?u3L)CQKl{_09fR-&ex~)lyrW4a~45kuY z+H!-iob2#zm%64c*_!G~y5{&2VY(yvo^05jq1dkMPrj476RTyGVi`Ud^L=1CCY5a6 zOx@B+spaaLt;mz0E$fo3II2l3pcxDc!crwi@l1)xOg0_EF+4@#Lv@Mykc3QJNpmb8 z64O1&aWzx&D76(=wsee^wsfQx9qv{PT{c}`f{d2m-)A|Jqsb5^MomvLRFGOQMbnBT;2vavab%Waik0r(%t4(oN}xL~NJ25WD0PebM25%%v7FCBw57 zO>-Qk6MHB0?dVihHONEp2uOn~mZ%Xhbum(Wgc^}mMKWX z)g04dmSkHnUftK>X*Qgg`BYOK%TPSKM4RrkB-8R0hsY+rF)szB$>E;-SHF;q0;@`MRGiG}D_2)n$qJSia*CMN=nf z(|-7Xi6Ce@4q!AyJ>BNMuM=NVJyUm?Ll7ysPfuZ)h)uo))3@Q82*wQDY7Q)d5=GZN zo!aK)+J^nGFdNeLR2VkQ3G;Nrlc09MW^BTAS=_G|`8Q9rwLZa=kn+(G1aWKGwItNNaWn3uYt z8=+p;U^v9Kvwt|Z9d<+u-ACWvayW$$uNp*F7;=c>`k<;oehxB=M2JnGEm*7>g!pzSMKj4MuN{_XT3UAQ zTzXpfeb_IxUCS^u+bngYIyA{f%G2Caf8LCQb+mgGG-M$oDjrnkFb@kyY#^YgTe4}w zhkVVKv-6r)K|Csf`*?=oz@3mz_FTy9P%WbPDzcM{h^VWGtwg2>`B(%j0kPB6CJ(28 zCmW`zx=3p#tT8*%vJ9aXQA$GagMsKCad#e1uSJkz)Ic_dK3&y>4H^(i>eX&&)pZpc z$-$a@yQvrUVpGc@@NFJ>X||;`qf3hx^hlO3dkWGNb^X%e)RZn+Ob1`6Fc=SwpadE) zT?vWBVy2-|nZaJ*tP*myNo=e`@eOd8+TCk7Oj1-$V-8%^B6J%v07(0arueQ!Bv++t zn;G#=J&1&;sw$Rj*_K5ncQs}^uFCt4P8^dRJ=B4i=b6x2_UqYh*HC@c_ASM+cegg= zb%p7&EoXPM-VGnsv0_9T4S4_}K?*liMb!QfyY6-)=Kdc2eiIT$`CfJSR{wLr??;LW8Tq z3>nf3Wds>U^(;zDZYpm{K2;QF`Clw!E($c=P+&$5j9hiHpSfzjZ|ijDWAmmE3|(7w zbjN|mA(bMPBY;q(X)F;HiUphUO4nw35bibGR}};dxABIIYh;JAI&1{%@KHD+L7K=X z%V2Q|Ok750q==*luER?l8|yNW36$(tnlrjq`fj66Bm^N#bz$xBx1(+C7F2A>5AEuN z$rw7qo`clQZAMXK*Hlf0h^A^72BHc| zHUxbSxk{y!nD8b`Rwi2;xV@+<>Hr@xRW-d+W*cD;-qFn+9z&8v1$A`mmiyD)usKim znB&WKc4+?5kM)^II3}teSf8Qz4%I9M8KXS0pnJqlOVX4F{-WR2d?a{JmSh)}LVXvO zIXRXYKwOhGq#Tq92#^RsI`UNZyH_J`$iAMPx~dJOg=xZKvcJ}{jM~JpVEgF7NTlZM z;Jgm_prsqWmEE^M@JiQYS7{QePn3eNI*ow=U0slF*KLGi%l144MYiuuem%P%86GvM z;^~SfdB{wts0c4ZPh zDU|vy?8H_zgaRK%WB5qTDvZmZh%yfI6-zE1Pj`SCGKNC3UuqHb?C&qgzBISjhJ#6t zZNs`yli8&7t@+)se+_ZQheT6)cFlrSNK#k?(!Gh)jY3TGO8;eU8;S$VK-!#-z`z!{ z&AO?dIwqqcM%Bfz?Lb0CuH)WrBM({{GPmYIeW=6Wnn*kblToc0j_Z`*oyt+FYbrw~ zLWD$_sash&hav*?qDfI^BCvWc5;FZt)>_c+BL_1B3hHxQEWp&@RGQ_RvWm>R^Lz`W zVo*)CY@eVYbzu4q>L02g*J{XYn#>GaRrl-`c0jDy9B^BNXmym*FjfR6)rE##SAz^4 z#MM01ui2KL?Vr2+cpr)}lzvd3ixSmzZQJ*=?d_B1oNnyI9CXQ&?__@{W)v-T12!O% zFHCc7xV-S!lf*^;ay)4^DWuWq72q%tR`U(Os z_6zW;?5%A&ycx+Abu7Y~!i$9tc(BYgPcI>oH;J9FF^|D^y+v-1i+zz}qR7=P^VG*P z$Q7Hj+emuKl05i;i@k!RA%vhruryR!2pv9ZHV4InL3!zRH{{?ss9j|gSt>!)(g{)> ziq>pyL8n6v#WW0sAzLH+Dv$tnw2+1CFM0<7@2J=VIp`X)jt5omypZXESWycYjx0;+ z1OGIDG7a(1gpWG1>$r$fs9sCQ<_Ox+IYo+k!9#+hFj17oD2PzRsVG30jUp~9e+03p z9ccsm!lQ@4DKZjv0$4A$jVh{U$Ay+18&wQ<4X^r8bD^k^N(LyUr9LC z*p})R8kaZs3Js}t0bo;2;x`+_CtrW?$Bn(B!?AZXESdPv`r$7ESbx5yIrHEDrlnbM zTbncOPqegUPJbI+cfEiv(kf>@e-K?wFX5BZN-}?UPfMGiwRQ_jTbqU7XdMtff}i)d zZWs2o_MtDIx$2pgWx`17m@tk(U&R0OTRVjf_@Zm7MHb#@J(T(1e-4Q6x9$)=g(=%w zW#Jn9KNs!$TE8LuE!u6Zd10WH2zy#L3Eyk&%>1`+x7;nf2|DWmcMhFRt;>Y>THA!} zXwPfS_ul(LxEAyI@FxWIjgdrI2Qz@#ZUs={Ue--WMd$_0n3*@Ty z8^5!v<%_59_~^dD|9WiUUHdxsKe+WD=J?CIuXFGC{EES)3zW_SZ3hm0>CwK&9{ZOs z?RfCyQ@{V-)c^RWbl$2x;dpIGtFm3qDfI{v-IBO8Vb%pD!~CKg|(^ei48 z9J!qiZD9EHdy5CX#n;(nD|+ds+po_FOGY*f4XqsAFj8FH+S4p=-Ma5Kq1jNkuD(rZ z%)N1&kTK0Qy!qyBLg8@KrD5!))tJK= z;pRP$Hx$+iDRIrM!j~UyD4a(>r}St;G|4VJo=OFLXiCh-T8!f0c9}THL~ICJ;X?GS zQ8*W(2zt3MV_9o%4fz2Rk7UX)Uu_IqW(7={h+3WunD;{gfN+_JQYeoj@wa*)C@kZs zz2bk(4HmlZp*e-Vxxq5g6s*FJK^r5oM)QiFMxaWe5OGclH#LO}CF=^;%4PEtEP(&y z5L^mMz%=i%KLOg+B?kfF@*E7L8t5Qx;9? zhbwN0L}$lKwE}K97iL*!&_y38an#4sx1Y^qw)VVvt5CS90jqyZ`0|g_h1G!M><&cB zH5c_>|KoI^WlUZJ=K17vslq#RLWw<|0{lbKqKv}9XyGIf7)N?9n)7(7a4sFsQ5L&QW|IwjMQ(g zcpS6%H;07V`!k>X(wr|Jf*JF^Ljqh%&{KN@P4gi?jl^*)A1#pVWb=wckr8NxvN>1~ zc-hPBLVv`mQuxZ8YVPc^V^+9v`7jUo? zz-BvWDN3H_PJZog=T$;|^o}xL4S<>hzM|_cpz;c4`@j6%v1CGZ1;bf@B?UaxH~K_M zd^shSo_%{kB{zdVD09Q5^xHvNE^Z!Ui?3TeQ5@gk78my{_6OMzPtm`Fj7^96S4hr1 zi(7x~;NVD+tz+Z(z_T-ThVUh3F=TC4#8G;YB9(95de_!yZ9n+V$vw&SWOecXE{X&xDjd*4l(&)gIDo=uw1-W&IxzgIj^7U2G2adpJR>7@C@i0DMUyGLVL zzL_+SkBQsU(TL;Y@rZ{fVjY~Hh3`~314-T=LqAdYe z$A@DNzpzRIvZhe2LU%i6%Qo?Th zxj#&a?YZx77CVKm)g5A{Vs1GdB(qmn&weGC{a!G87cOQhr&s2W1Y%dJ8si^dahPgLt*^7CA^N49a*?(Dx5QnfyJ_E%%=53o)ISZ@SaK@1>VpH-DjWknON zxi8n>^+pZklL6$90>~pb#eO*xWK}VgpdPuYn$cqc>Yn@gqAvh79V%lc)ToY1&@R-V zy%vfvKznh`CDAGiu>|tPHPsBB2_+do?z=e-X)`f6ucDGmAotx|4O!a575$?C^7LgO zD@wrx@^lU4fdF#P_7s=nlLIlEfdePiV*>HyKs8rK0*E6a#JMVnixr#+;JF&W7XrW& zA>i>_Vv7lQnxRHbu#VqS&D`IIA`7wh6sBV}1_f<6efMWmxieRH?J4lQaP#F4w=Q`y z6<$C`*VDL!&d&ny(x+v?jdlW98A#s667#t|52S*GG3h01j0*IMjK z_0+!!jNljm%IP-(s4e6}nQ^pZHE0h9Xs?85C6>g=2(2bma+ngUX6hRO%$X48g<6;y zHw_72FVq0O8~{$?*X@s|NP50CwuLVc1xIIoKKF zI!AtTeKo_+1;|f@$Y+3D4(u~CJT10!#4|OBCj!J*Lc}A3mqVknA(}To$$_&U4sj?y2T>?-v6X4`I(Hu-%bZt3y3U zIa`B*+b&qsD_7BgnwqU;=uI~EWYK*BC&;Ja> z^Zbs$9!}2$QMtoUkWSYieP$n5-VZ{gCq`msNG#~4@Khi^4G7Ubm2^73uR?$K(gJQW~62V}(ZH|s&J zjJX{9%^K|E0ru-5_HjIZtTD)GXBwf9K9kOEPj?+3>&^f6mELh&^mhD1gyyRJg=Dnp3CXq^W$u6l#(i#qsfKgL^kHiUYV#fx$gIQ42l& zY-J`(=KJ!8C#r#W{#)*q&xOF}>j17cCEiL%gD+ISH%(+=knGNp~PC389;^HxaaB*;VsW4xHE)4q!s)5(q1IzJl#AJ`oj z)6@6O06hIL@FKuZ-&YO1dw*a|A@I~D4jdlgWv8DJMrVVePw+C0M zwd`Et4PwQ$Ydr4*SXbp8?FYvRFLaan)z4clQu-y@sJH~a0+foaXo6CwjV3qE^+D7T zLE^jX`Y0sQy2xes2ZgJ>9Z!(z4dB7aW4+VskX?A67ZZpb_Y$6RWdXSCqVZjSOuNqL z_cMuFGd(Cs7^*u6{{NI+%8L4BEKMHNB&Jb!ME+$g4U67cd3hZHto+U;R?TceSnv-H zB7W+P$icr^D3|bcR|plaY+{b<1__Vf;`y^#arxm+JgIJ?(L0|QAg20rshI1qh{}gV zF}HOuHxA{|yQ3H>UN;>dr<>6oy*sMj8sPEK2i_G-d$?3f`R5|>q&%v4_!RTnBa4$sw(w5|YZ`qQo$C|=NqXgAZ3gMJRA>u}5ICrh=wT|&>{ z)ObqWzC5!#)eRH9eM|V5Wqo*AA>FlA$gf7g;m^dR;qlJs)TUEes%T4<&TePJ;E>np zIXZp=pd#a!FFN>Th77;EQN!;J!>`lxC{s-SJE8CkJ{)>2p5@_*_5b641bwL>4B%Id F{vT}G{2u@S literal 0 HcmV?d00001 diff --git a/crates/core/tests/data_err_logs/table_g/_delta_log/00000000000000009999.json b/crates/core/tests/data_err_logs/table_g/_delta_log/00000000000000009999.json new file mode 100644 index 0000000000..c1f8c8b095 --- /dev/null +++ b/crates/core/tests/data_err_logs/table_g/_delta_log/00000000000000009999.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317342605,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":9,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4818"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"ba4b200d-e0ac-4715-ad4e-bed8ef1b20cd"}} +{"add":{"path":"part-00001-55fbea2e-0788-438a-a50c-65f809acc05b-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317342581,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":90},\"maxValues\":{\"id\":90},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-0d891eaf-0e01-46a6-879e-49bbca90c215-c000.snappy.parquet","partitionValues":{},"size":451,"modificationTime":1742317342581,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":91},\"maxValues\":{\"id\":91},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-569d12e9-04d5-4fe2-9554-f288047f3386-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317342592,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":92},\"maxValues\":{\"id\":92},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-1bc89ec9-8092-49e1-9b1f-123ae50e3d40-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317342579,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":93},\"maxValues\":{\"id\":93},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-b4a223d6-1d87-49c9-84c9-a85eece61839-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317342591,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":94},\"maxValues\":{\"id\":94},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-4c3b6be7-979c-4f42-8920-efa32b751d97-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317342578,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":95},\"maxValues\":{\"id\":95},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-6c224359-8995-417a-8b24-b2e530327bc6-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317342581,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":96},\"maxValues\":{\"id\":96},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-2a40eb21-34d2-48ca-aaa5-55db674f56de-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317342590,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":97},\"maxValues\":{\"id\":97},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-ca8256ed-98cd-460d-8de2-9f6f7f388703-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317342579,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":98},\"maxValues\":{\"id\":98},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-f131fc78-c201-4e8d-b194-222b2e79778d-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317342578,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":99},\"maxValues\":{\"id\":99},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_g/_delta_log/_last_checkpoint b/crates/core/tests/data_err_logs/table_g/_delta_log/_last_checkpoint new file mode 100644 index 0000000000..604c2b31a2 --- /dev/null +++ b/crates/core/tests/data_err_logs/table_g/_delta_log/_last_checkpoint @@ -0,0 +1 @@ +{"version":10,"size":102,"sizeInBytes":21615,"numOfAddFiles":100,"checkpointSchema":{"type":"struct","fields":[{"name":"txn","type":{"type":"struct","fields":[{"name":"appId","type":"string","nullable":true,"metadata":{}},{"name":"version","type":"long","nullable":true,"metadata":{}},{"name":"lastUpdated","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"add","type":{"type":"struct","fields":[{"name":"path","type":"string","nullable":true,"metadata":{}},{"name":"partitionValues","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"size","type":"long","nullable":true,"metadata":{}},{"name":"modificationTime","type":"long","nullable":true,"metadata":{}},{"name":"dataChange","type":"boolean","nullable":true,"metadata":{}},{"name":"tags","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"deletionVector","type":{"type":"struct","fields":[{"name":"storageType","type":"string","nullable":true,"metadata":{}},{"name":"pathOrInlineDv","type":"string","nullable":true,"metadata":{}},{"name":"offset","type":"integer","nullable":true,"metadata":{}},{"name":"sizeInBytes","type":"integer","nullable":true,"metadata":{}},{"name":"cardinality","type":"long","nullable":true,"metadata":{}},{"name":"maxRowIndex","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"baseRowId","type":"long","nullable":true,"metadata":{}},{"name":"defaultRowCommitVersion","type":"long","nullable":true,"metadata":{}},{"name":"clusteringProvider","type":"string","nullable":true,"metadata":{}},{"name":"stats","type":"string","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"remove","type":{"type":"struct","fields":[{"name":"path","type":"string","nullable":true,"metadata":{}},{"name":"deletionTimestamp","type":"long","nullable":true,"metadata":{}},{"name":"dataChange","type":"boolean","nullable":true,"metadata":{}},{"name":"extendedFileMetadata","type":"boolean","nullable":true,"metadata":{}},{"name":"partitionValues","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"size","type":"long","nullable":true,"metadata":{}},{"name":"deletionVector","type":{"type":"struct","fields":[{"name":"storageType","type":"string","nullable":true,"metadata":{}},{"name":"pathOrInlineDv","type":"string","nullable":true,"metadata":{}},{"name":"offset","type":"integer","nullable":true,"metadata":{}},{"name":"sizeInBytes","type":"integer","nullable":true,"metadata":{}},{"name":"cardinality","type":"long","nullable":true,"metadata":{}},{"name":"maxRowIndex","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"baseRowId","type":"long","nullable":true,"metadata":{}},{"name":"defaultRowCommitVersion","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"metaData","type":{"type":"struct","fields":[{"name":"id","type":"string","nullable":true,"metadata":{}},{"name":"name","type":"string","nullable":true,"metadata":{}},{"name":"description","type":"string","nullable":true,"metadata":{}},{"name":"format","type":{"type":"struct","fields":[{"name":"provider","type":"string","nullable":true,"metadata":{}},{"name":"options","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"schemaString","type":"string","nullable":true,"metadata":{}},{"name":"partitionColumns","type":{"type":"array","elementType":"string","containsNull":true},"nullable":true,"metadata":{}},{"name":"configuration","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"createdTime","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"protocol","type":{"type":"struct","fields":[{"name":"minReaderVersion","type":"integer","nullable":true,"metadata":{}},{"name":"minWriterVersion","type":"integer","nullable":true,"metadata":{}},{"name":"readerFeatures","type":{"type":"array","elementType":"string","containsNull":true},"nullable":true,"metadata":{}},{"name":"writerFeatures","type":{"type":"array","elementType":"string","containsNull":true},"nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"domainMetadata","type":{"type":"struct","fields":[{"name":"domain","type":"string","nullable":true,"metadata":{}},{"name":"configuration","type":"string","nullable":true,"metadata":{}},{"name":"removed","type":"boolean","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}}]},"checksum":"94a578f92841fa7ba9cdee96b5905fdb"} diff --git a/crates/core/tests/data_err_logs/table_h/_delta_log/00000000000000000000.json b/crates/core/tests/data_err_logs/table_h/_delta_log/00000000000000000000.json new file mode 100644 index 0000000000..0316f09771 --- /dev/null +++ b/crates/core/tests/data_err_logs/table_h/_delta_log/00000000000000000000.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1742317262289,"operation":"CREATE TABLE","operationParameters":{"partitionBy":"[]","clusterBy":"[]","description":null,"isManaged":"false","properties":"{}"},"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"c0983a7a-aeca-4ba8-a509-d5dbc71a10de"}} +{"metaData":{"id":"5f54ef5f-e511-4114-b6e5-f6c206c068b6","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1742317261939}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":1}} diff --git a/crates/core/tests/data_err_logs/table_h/_delta_log/00000000000000000001.json b/crates/core/tests/data_err_logs/table_h/_delta_log/00000000000000000001.json new file mode 100644 index 0000000000..23002c6f58 --- /dev/null +++ b/crates/core/tests/data_err_logs/table_h/_delta_log/00000000000000000001.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317316973,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":0,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4819"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"a1c49e36-8fcd-46e6-8ede-5b8560f7ec3b"}} +{"add":{"path":"part-00001-665397f5-1435-4478-a598-ca226c99ffcf-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317316825,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":0},\"maxValues\":{\"id\":0},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-fd50becf-074e-4a1f-985b-01529e9f7b03-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317316820,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":1},\"maxValues\":{\"id\":1},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-4fc1f70f-9daa-46e6-83b5-ea8144d4a96d-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317316822,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":2},\"maxValues\":{\"id\":2},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-efb0808c-3b7f-4a4d-bc36-daa91c074b5b-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317316825,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":3},\"maxValues\":{\"id\":3},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-c8664e02-01fe-4c2d-8eba-ae84012d7aad-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317316825,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":4},\"maxValues\":{\"id\":4},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-7446ef47-3110-4c3f-a2d0-0c71bafc893a-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317316820,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":5},\"maxValues\":{\"id\":5},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-ee372cff-6aae-4979-970b-88cc154a31bd-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317316822,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":6},\"maxValues\":{\"id\":6},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-9ea59fdf-fc26-4650-a282-9c2cc1906c7c-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317316823,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":7},\"maxValues\":{\"id\":7},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-d50ebfbf-e534-4bc8-b63d-437f6029da6e-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317316823,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":8},\"maxValues\":{\"id\":8},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-96e67376-3d15-4895-bd5d-5e0a325bcb83-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317316823,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":9},\"maxValues\":{\"id\":9},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_h/_delta_log/00000000000000000002.json b/crates/core/tests/data_err_logs/table_h/_delta_log/00000000000000000002.json new file mode 100644 index 0000000000..86e3c7470c --- /dev/null +++ b/crates/core/tests/data_err_logs/table_h/_delta_log/00000000000000000002.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317326453,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":1,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4818"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"ac59851b-981e-4e82-96ea-36a543cfe254"}} +{"add":{"path":"part-00001-6491d41d-d498-4a89-a291-92d964035606-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317326374,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":10},\"maxValues\":{\"id\":10},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-d359921a-3cb1-454d-8aa0-ac5c830fcdc5-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317326373,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":11},\"maxValues\":{\"id\":11},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-b2aae64d-1fab-4106-bc87-2454e945dada-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317326373,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":12},\"maxValues\":{\"id\":12},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-cca989ea-d56e-4e1e-a4ba-538ef7801997-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317326373,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":13},\"maxValues\":{\"id\":13},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-14706643-f3f0-4ba9-8282-7d55bb4ecacb-c000.snappy.parquet","partitionValues":{},"size":451,"modificationTime":1742317326374,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":14},\"maxValues\":{\"id\":14},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-25801ed5-1cf2-43fa-bbdb-8898fc102e64-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317326373,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":15},\"maxValues\":{\"id\":15},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-03fbeb6f-b8a5-448e-afa7-0f49fca61866-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317326374,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":16},\"maxValues\":{\"id\":16},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-08361c76-870e-4ddf-9153-f67852849ec3-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317326374,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":17},\"maxValues\":{\"id\":17},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-368e738e-0673-4e76-a1ff-5ba9c755396e-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317326373,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":18},\"maxValues\":{\"id\":18},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-2c9a3837-e2c5-42bd-b888-f3205f4b894c-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317326374,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":19},\"maxValues\":{\"id\":19},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_h/_delta_log/00000000000000000003.json b/crates/core/tests/data_err_logs/table_h/_delta_log/00000000000000000003.json new file mode 100644 index 0000000000..14665bcd27 --- /dev/null +++ b/crates/core/tests/data_err_logs/table_h/_delta_log/00000000000000000003.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317330682,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":2,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4818"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"5e6fdfee-2a82-40d1-bc83-47b3f0f3f21e"}} +{"add":{"path":"part-00001-b7be8377-b715-4234-b316-201fd2c9c142-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317330578,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":20},\"maxValues\":{\"id\":20},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-719b1086-b19c-45d1-8c4c-c11db02e2e0b-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317330581,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":21},\"maxValues\":{\"id\":21},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-eb0fd03c-3e47-42ed-9897-e79dd1567fb1-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317330577,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":22},\"maxValues\":{\"id\":22},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-48b99dee-6d3e-4cfb-b651-4769de7f5b24-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317330577,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":23},\"maxValues\":{\"id\":23},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-b873a231-8352-4bac-b6f1-b53ee738d212-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317330582,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":24},\"maxValues\":{\"id\":24},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-74ed051c-b116-4947-b62c-2086bcd5bb90-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317330577,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":25},\"maxValues\":{\"id\":25},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-680d9e49-afce-4a7d-bca8-b03438c2fd74-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317330577,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":26},\"maxValues\":{\"id\":26},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-81c10052-aacc-4ecf-b9cf-64f81b3bd435-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317330577,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":27},\"maxValues\":{\"id\":27},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-cf9cac69-932c-43bf-8e4b-fd059d519c0f-c000.snappy.parquet","partitionValues":{},"size":451,"modificationTime":1742317330577,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":28},\"maxValues\":{\"id\":28},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-43e3252c-8ac7-4c7a-bcb4-15aaf7ae95b9-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317330581,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":29},\"maxValues\":{\"id\":29},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_h/_delta_log/00000000000000000004.json b/crates/core/tests/data_err_logs/table_h/_delta_log/00000000000000000004.json new file mode 100644 index 0000000000..ed86e283d5 --- /dev/null +++ b/crates/core/tests/data_err_logs/table_h/_delta_log/00000000000000000004.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317333588,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":3,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4818"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"57611e9e-5312-4502-a3ad-c0c78799773e"}} +{"add":{"path":"part-00001-5c92b4bb-af84-4066-8aeb-1e493b7147df-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317333524,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":30},\"maxValues\":{\"id\":30},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-b050a084-ab25-420f-bb7b-50eb95d25e4e-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317333551,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":31},\"maxValues\":{\"id\":31},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-1880b504-fcf5-4f43-92d4-c43e8dd9d7d9-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317333550,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":32},\"maxValues\":{\"id\":32},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-4af9173b-8472-41cd-8772-e2bdb084c5d5-c000.snappy.parquet","partitionValues":{},"size":451,"modificationTime":1742317333524,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":33},\"maxValues\":{\"id\":33},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-89cec234-f844-4802-a786-5d9133bbe489-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317333523,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":34},\"maxValues\":{\"id\":34},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-fa11b282-ec0d-4513-9baf-2b84c5f94a12-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317333550,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":35},\"maxValues\":{\"id\":35},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-895723cb-0dba-4019-a2a9-e6db9a937c91-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317333550,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":36},\"maxValues\":{\"id\":36},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-dca57e7a-f859-4b39-bc43-03e1061f1b4e-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317333551,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":37},\"maxValues\":{\"id\":37},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-96756753-7714-4c07-a238-d5b57f42a8ce-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317333523,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":38},\"maxValues\":{\"id\":38},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-9a7b861f-5d9a-41c4-b4ec-7f0d1391acfe-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317333551,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":39},\"maxValues\":{\"id\":39},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_h/_delta_log/00000000000000000005.json b/crates/core/tests/data_err_logs/table_h/_delta_log/00000000000000000005.json new file mode 100644 index 0000000000..f77fd655fe --- /dev/null +++ b/crates/core/tests/data_err_logs/table_h/_delta_log/00000000000000000005.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317336099,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":4,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4819"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"203edd74-d2b2-46fe-935a-6222cfb888d4"}} +{"add":{"path":"part-00001-a24fe71c-ba35-47bb-8f3e-636d5991d5ae-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317336058,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":40},\"maxValues\":{\"id\":40},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-a56ad50f-bc64-44cb-bb55-e2d177947b3d-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317336079,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":41},\"maxValues\":{\"id\":41},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-95c53cf1-b472-4c34-b728-1dd7cbed8b2f-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317336074,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":42},\"maxValues\":{\"id\":42},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-7e442ced-e810-44d9-9d28-3027e652a0ec-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317336080,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":43},\"maxValues\":{\"id\":43},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-68248457-2fa3-407e-9de3-759b1e052b99-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317336075,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":44},\"maxValues\":{\"id\":44},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-89a48ead-5bf3-4d16-aada-97c11386fcaf-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317336076,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":45},\"maxValues\":{\"id\":45},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-42b618de-c46c-4888-9b48-b99493ec2983-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317336070,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":46},\"maxValues\":{\"id\":46},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-4dc49305-f4f8-4ec9-9a40-8f4b3bd81324-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317336055,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":47},\"maxValues\":{\"id\":47},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-d7bb17a9-223e-474b-9d78-2c745cc35a4b-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317336054,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":48},\"maxValues\":{\"id\":48},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-68a79bb6-a31e-49bf-848f-2d64ceb834c0-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317336079,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":49},\"maxValues\":{\"id\":49},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_h/_delta_log/00000000000000000006.json b/crates/core/tests/data_err_logs/table_h/_delta_log/00000000000000000006.json new file mode 100644 index 0000000000..28116ba9f2 --- /dev/null +++ b/crates/core/tests/data_err_logs/table_h/_delta_log/00000000000000000006.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317338700,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":5,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4818"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"9ee4b86b-1bbe-4d6e-adbd-8dd4961989fb"}} +{"add":{"path":"part-00001-a8fc5b00-29e4-4a99-961d-b0cbcc23d165-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317338678,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":50},\"maxValues\":{\"id\":50},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-5a3079f8-abbc-4b5f-a1e3-340830e59222-c000.snappy.parquet","partitionValues":{},"size":451,"modificationTime":1742317338670,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":51},\"maxValues\":{\"id\":51},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-567d7b20-b6ce-4e96-b500-caa34c80f8a7-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317338676,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":52},\"maxValues\":{\"id\":52},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-b794df4b-174f-468a-9de7-2aa865ba7014-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317338676,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":53},\"maxValues\":{\"id\":53},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-a15406cf-c141-4f7b-b302-e4b5a145cad5-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317338675,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":54},\"maxValues\":{\"id\":54},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-8a52349c-d93b-4c59-b493-13486bb5e284-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317338680,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":55},\"maxValues\":{\"id\":55},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-1734b4eb-4414-4b3a-8e99-1bd099c9e6b5-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317338670,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":56},\"maxValues\":{\"id\":56},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-02781f2a-6c34-42ca-80a4-e830b2eeb963-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317338676,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":57},\"maxValues\":{\"id\":57},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-63ec1c21-c31c-43d4-b5c9-9c206aeeb280-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317338670,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":58},\"maxValues\":{\"id\":58},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-b732e8e4-7d1b-470d-89a5-86a3f8d8bdc2-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317338680,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":59},\"maxValues\":{\"id\":59},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_h/_delta_log/00000000000000000007.json b/crates/core/tests/data_err_logs/table_h/_delta_log/00000000000000000007.json new file mode 100644 index 0000000000..956c8b508c --- /dev/null +++ b/crates/core/tests/data_err_logs/table_h/_delta_log/00000000000000000007.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317339658,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":6,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4819"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"63c885b0-74eb-4075-a02a-a43b8202b3f8"}} +{"add":{"path":"part-00001-f697bc51-b77f-4234-938a-5f85478cedec-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317339621,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":60},\"maxValues\":{\"id\":60},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-839f210e-cf84-4c5c-b185-fd2fe2b5ee6f-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317339491,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":61},\"maxValues\":{\"id\":61},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-9ebe1c22-87a1-4f37-a695-77658c3e70a8-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317339491,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":62},\"maxValues\":{\"id\":62},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-01b2687c-45e4-484c-b1d6-80e06b5b5d11-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317339490,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":63},\"maxValues\":{\"id\":63},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-05662cc4-6a79-4204-aec1-2311a44d8c74-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317339511,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":64},\"maxValues\":{\"id\":64},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-67f1b806-ef5f-4f8a-890b-b3b5ad1d234c-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317339490,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":65},\"maxValues\":{\"id\":65},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-37318455-4128-4e1e-9ab7-5c587ac9fde0-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317339621,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":66},\"maxValues\":{\"id\":66},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-c5d68934-1f5a-40c4-b5be-1233eb15378a-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317339511,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":67},\"maxValues\":{\"id\":67},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-41e40903-13b6-4465-aa3c-bd8cb5e52b18-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317339621,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":68},\"maxValues\":{\"id\":68},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-110c626e-ea13-4204-8cae-a3183d89a4b7-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317339621,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":69},\"maxValues\":{\"id\":69},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_h/_delta_log/00000000000000000008.json b/crates/core/tests/data_err_logs/table_h/_delta_log/00000000000000000008.json new file mode 100644 index 0000000000..ea216c1556 --- /dev/null +++ b/crates/core/tests/data_err_logs/table_h/_delta_log/00000000000000000008.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317340794,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":7,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4818"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"b5902ae9-2aab-46f9-82f9-d68ae45940d7"}} +{"add":{"path":"part-00001-3b62f1d4-2a3e-4611-a55b-e9d2ace11b3c-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317340752,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":70},\"maxValues\":{\"id\":70},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-9c066923-23f4-45f6-b2af-5a4ecbef1707-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317340776,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":71},\"maxValues\":{\"id\":71},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-cfb48b6e-0fc7-4d6b-8ab7-c52f29f71b94-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317340752,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":72},\"maxValues\":{\"id\":72},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-b21a45a5-9a53-4dfa-8327-8a82b6b283e9-c000.snappy.parquet","partitionValues":{},"size":451,"modificationTime":1742317340766,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":73},\"maxValues\":{\"id\":73},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-afa450c4-f649-4c88-817a-6d0bdfc4da6f-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317340765,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":74},\"maxValues\":{\"id\":74},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-dd96cd25-394d-4873-84e7-f2f6b0eb5a67-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317340776,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":75},\"maxValues\":{\"id\":75},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-3a98b7ed-8665-4bc5-8704-6745f7084cd0-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317340776,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":76},\"maxValues\":{\"id\":76},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-8dfd3dc5-cf31-42fc-8c55-2ac70ce9e18d-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317340782,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":77},\"maxValues\":{\"id\":77},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-97e7d287-aee3-445d-a90e-f3b2ef4bd7cd-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317340765,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":78},\"maxValues\":{\"id\":78},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-db2340aa-28ff-4826-b39e-07ba516551e9-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317340779,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":79},\"maxValues\":{\"id\":79},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_h/_delta_log/00000000000000000009.json b/crates/core/tests/data_err_logs/table_h/_delta_log/00000000000000000009.json new file mode 100644 index 0000000000..1f9d5fb05b --- /dev/null +++ b/crates/core/tests/data_err_logs/table_h/_delta_log/00000000000000000009.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317341714,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":8,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4819"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"5379512e-d4d3-42b5-817d-70ecf05f2385"}} +{"add":{"path":"part-00001-4e7175fd-6ffb-4b6a-946c-43aa7c439104-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317341702,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":80},\"maxValues\":{\"id\":80},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-f3cc4f07-93ec-4a47-add1-b16c1149c3d9-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317341667,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":81},\"maxValues\":{\"id\":81},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-07581f2d-ee98-4464-a28b-f738e88749e4-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317341702,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":82},\"maxValues\":{\"id\":82},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-c9d7468d-fc14-445c-8dbd-65d616f8eb05-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317341666,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":83},\"maxValues\":{\"id\":83},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-56eb2260-fb49-4138-a5c8-f0ae0949f4e2-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317341670,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":84},\"maxValues\":{\"id\":84},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-5d15eeca-0fca-4986-a18e-4d86bf5ba2f6-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317341670,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":85},\"maxValues\":{\"id\":85},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-3e13d733-f55d-42ac-be4f-f4400e999c29-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317341702,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":86},\"maxValues\":{\"id\":86},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-22c23f1e-26d2-488e-8e07-2de6ae5fded5-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317341667,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":87},\"maxValues\":{\"id\":87},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-d0debf09-5f82-4c61-8636-27e51fba37e5-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317341674,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":88},\"maxValues\":{\"id\":88},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-7403e0c4-bb07-4a1b-9fca-a01523713f85-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317341702,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":89},\"maxValues\":{\"id\":89},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_h/_delta_log/00000000000000000010.checkpoint.parquet b/crates/core/tests/data_err_logs/table_h/_delta_log/00000000000000000010.checkpoint.parquet new file mode 100644 index 0000000000000000000000000000000000000000..9df253f2f762891c53763e62e57576217a8b289a GIT binary patch literal 21615 zcmeHve{@vWedn9eNJxOjGSQn1ss0cYt8$MXeZSv*J~}DCYRg2%@)&#F)J}Zgd-s8C z{i-xzpbN&@WK1Zf3Lz{>Xcs&FL5b`75Z6szH)~Rs^*WB%OX8GzecHfw-NyBYj6pooH)6&Hv<{p$ON=68rMrSQ2) zY!-w$`36A{UP=i+lL1qsVriLp{0A?WQk6tru+9`fOL!!eZa8>2pI(Xf7#%N4vMejI z6$9Zxb1&s0pGFh|C){f0z+svEMa zOTMYw5>`PZqF7WiJ=yVmSNEvtOWSq}`z?u3L)CQKl{_09fR-&ex~)lyrW4a~45kuY z+H!-iob2#zm%64c*_!G~y5{&2VY(yvo^05jq1dkMPrj476RTyGVi`Ud^L=1CCY5a6 zOx@B+spaaLt;mz0E$fo3II2l3pcxDc!crwi@l1)xOg0_EF+4@#Lv@Mykc3QJNpmb8 z64O1&aWzx&D76(=wsee^wsfQx9qv{PT{c}`f{d2m-)A|Jqsb5^MomvLRFGOQMbnBT;2vavab%Waik0r(%t4(oN}xL~NJ25WD0PebM25%%v7FCBw57 zO>-Qk6MHB0?dVihHONEp2uOn~mZ%Xhbum(Wgc^}mMKWX z)g04dmSkHnUftK>X*Qgg`BYOK%TPSKM4RrkB-8R0hsY+rF)szB$>E;-SHF;q0;@`MRGiG}D_2)n$qJSia*CMN=nf z(|-7Xi6Ce@4q!AyJ>BNMuM=NVJyUm?Ll7ysPfuZ)h)uo))3@Q82*wQDY7Q)d5=GZN zo!aK)+J^nGFdNeLR2VkQ3G;Nrlc09MW^BTAS=_G|`8Q9rwLZa=kn+(G1aWKGwItNNaWn3uYt z8=+p;U^v9Kvwt|Z9d<+u-ACWvayW$$uNp*F7;=c>`k<;oehxB=M2JnGEm*7>g!pzSMKj4MuN{_XT3UAQ zTzXpfeb_IxUCS^u+bngYIyA{f%G2Caf8LCQb+mgGG-M$oDjrnkFb@kyY#^YgTe4}w zhkVVKv-6r)K|Csf`*?=oz@3mz_FTy9P%WbPDzcM{h^VWGtwg2>`B(%j0kPB6CJ(28 zCmW`zx=3p#tT8*%vJ9aXQA$GagMsKCad#e1uSJkz)Ic_dK3&y>4H^(i>eX&&)pZpc z$-$a@yQvrUVpGc@@NFJ>X||;`qf3hx^hlO3dkWGNb^X%e)RZn+Ob1`6Fc=SwpadE) zT?vWBVy2-|nZaJ*tP*myNo=e`@eOd8+TCk7Oj1-$V-8%^B6J%v07(0arueQ!Bv++t zn;G#=J&1&;sw$Rj*_K5ncQs}^uFCt4P8^dRJ=B4i=b6x2_UqYh*HC@c_ASM+cegg= zb%p7&EoXPM-VGnsv0_9T4S4_}K?*liMb!QfyY6-)=Kdc2eiIT$`CfJSR{wLr??;LW8Tq z3>nf3Wds>U^(;zDZYpm{K2;QF`Clw!E($c=P+&$5j9hiHpSfzjZ|ijDWAmmE3|(7w zbjN|mA(bMPBY;q(X)F;HiUphUO4nw35bibGR}};dxABIIYh;JAI&1{%@KHD+L7K=X z%V2Q|Ok750q==*luER?l8|yNW36$(tnlrjq`fj66Bm^N#bz$xBx1(+C7F2A>5AEuN z$rw7qo`clQZAMXK*Hlf0h^A^72BHc| zHUxbSxk{y!nD8b`Rwi2;xV@+<>Hr@xRW-d+W*cD;-qFn+9z&8v1$A`mmiyD)usKim znB&WKc4+?5kM)^II3}teSf8Qz4%I9M8KXS0pnJqlOVX4F{-WR2d?a{JmSh)}LVXvO zIXRXYKwOhGq#Tq92#^RsI`UNZyH_J`$iAMPx~dJOg=xZKvcJ}{jM~JpVEgF7NTlZM z;Jgm_prsqWmEE^M@JiQYS7{QePn3eNI*ow=U0slF*KLGi%l144MYiuuem%P%86GvM z;^~SfdB{wts0c4ZPh zDU|vy?8H_zgaRK%WB5qTDvZmZh%yfI6-zE1Pj`SCGKNC3UuqHb?C&qgzBISjhJ#6t zZNs`yli8&7t@+)se+_ZQheT6)cFlrSNK#k?(!Gh)jY3TGO8;eU8;S$VK-!#-z`z!{ z&AO?dIwqqcM%Bfz?Lb0CuH)WrBM({{GPmYIeW=6Wnn*kblToc0j_Z`*oyt+FYbrw~ zLWD$_sash&hav*?qDfI^BCvWc5;FZt)>_c+BL_1B3hHxQEWp&@RGQ_RvWm>R^Lz`W zVo*)CY@eVYbzu4q>L02g*J{XYn#>GaRrl-`c0jDy9B^BNXmym*FjfR6)rE##SAz^4 z#MM01ui2KL?Vr2+cpr)}lzvd3ixSmzZQJ*=?d_B1oNnyI9CXQ&?__@{W)v-T12!O% zFHCc7xV-S!lf*^;ay)4^DWuWq72q%tR`U(Os z_6zW;?5%A&ycx+Abu7Y~!i$9tc(BYgPcI>oH;J9FF^|D^y+v-1i+zz}qR7=P^VG*P z$Q7Hj+emuKl05i;i@k!RA%vhruryR!2pv9ZHV4InL3!zRH{{?ss9j|gSt>!)(g{)> ziq>pyL8n6v#WW0sAzLH+Dv$tnw2+1CFM0<7@2J=VIp`X)jt5omypZXESWycYjx0;+ z1OGIDG7a(1gpWG1>$r$fs9sCQ<_Ox+IYo+k!9#+hFj17oD2PzRsVG30jUp~9e+03p z9ccsm!lQ@4DKZjv0$4A$jVh{U$Ay+18&wQ<4X^r8bD^k^N(LyUr9LC z*p})R8kaZs3Js}t0bo;2;x`+_CtrW?$Bn(B!?AZXESdPv`r$7ESbx5yIrHEDrlnbM zTbncOPqegUPJbI+cfEiv(kf>@e-K?wFX5BZN-}?UPfMGiwRQ_jTbqU7XdMtff}i)d zZWs2o_MtDIx$2pgWx`17m@tk(U&R0OTRVjf_@Zm7MHb#@J(T(1e-4Q6x9$)=g(=%w zW#Jn9KNs!$TE8LuE!u6Zd10WH2zy#L3Eyk&%>1`+x7;nf2|DWmcMhFRt;>Y>THA!} zXwPfS_ul(LxEAyI@FxWIjgdrI2Qz@#ZUs={Ue--WMd$_0n3*@Ty z8^5!v<%_59_~^dD|9WiUUHdxsKe+WD=J?CIuXFGC{EES)3zW_SZ3hm0>CwK&9{ZOs z?RfCyQ@{V-)c^RWbl$2x;dpIGtFm3qDfI{v-IBO8Vb%pD!~CKg|(^ei48 z9J!qiZD9EHdy5CX#n;(nD|+ds+po_FOGY*f4XqsAFj8FH+S4p=-Ma5Kq1jNkuD(rZ z%)N1&kTK0Qy!qyBLg8@KrD5!))tJK= z;pRP$Hx$+iDRIrM!j~UyD4a(>r}St;G|4VJo=OFLXiCh-T8!f0c9}THL~ICJ;X?GS zQ8*W(2zt3MV_9o%4fz2Rk7UX)Uu_IqW(7={h+3WunD;{gfN+_JQYeoj@wa*)C@kZs zz2bk(4HmlZp*e-Vxxq5g6s*FJK^r5oM)QiFMxaWe5OGclH#LO}CF=^;%4PEtEP(&y z5L^mMz%=i%KLOg+B?kfF@*E7L8t5Qx;9? zhbwN0L}$lKwE}K97iL*!&_y38an#4sx1Y^qw)VVvt5CS90jqyZ`0|g_h1G!M><&cB zH5c_>|KoI^WlUZJ=K17vslq#RLWw<|0{lbKqKv}9XyGIf7)N?9n)7(7a4sFsQ5L&QW|IwjMQ(g zcpS6%H;07V`!k>X(wr|Jf*JF^Ljqh%&{KN@P4gi?jl^*)A1#pVWb=wckr8NxvN>1~ zc-hPBLVv`mQuxZ8YVPc^V^+9v`7jUo? zz-BvWDN3H_PJZog=T$;|^o}xL4S<>hzM|_cpz;c4`@j6%v1CGZ1;bf@B?UaxH~K_M zd^shSo_%{kB{zdVD09Q5^xHvNE^Z!Ui?3TeQ5@gk78my{_6OMzPtm`Fj7^96S4hr1 zi(7x~;NVD+tz+Z(z_T-ThVUh3F=TC4#8G;YB9(95de_!yZ9n+V$vw&SWOecXE{X&xDjd*4l(&)gIDo=uw1-W&IxzgIj^7U2G2adpJR>7@C@i0DMUyGLVL zzL_+SkBQsU(TL;Y@rZ{fVjY~Hh3`~314-T=LqAdYe z$A@DNzpzRIvZhe2LU%i6%Qo?Th zxj#&a?YZx77CVKm)g5A{Vs1GdB(qmn&weGC{a!G87cOQhr&s2W1Y%dJ8si^dahPgLt*^7CA^N49a*?(Dx5QnfyJ_E%%=53o)ISZ@SaK@1>VpH-DjWknON zxi8n>^+pZklL6$90>~pb#eO*xWK}VgpdPuYn$cqc>Yn@gqAvh79V%lc)ToY1&@R-V zy%vfvKznh`CDAGiu>|tPHPsBB2_+do?z=e-X)`f6ucDGmAotx|4O!a575$?C^7LgO zD@wrx@^lU4fdF#P_7s=nlLIlEfdePiV*>HyKs8rK0*E6a#JMVnixr#+;JF&W7XrW& zA>i>_Vv7lQnxRHbu#VqS&D`IIA`7wh6sBV}1_f<6efMWmxieRH?J4lQaP#F4w=Q`y z6<$C`*VDL!&d&ny(x+v?jdlW98A#s667#t|52S*GG3h01j0*IMjK z_0+!!jNljm%IP-(s4e6}nQ^pZHE0h9Xs?85C6>g=2(2bma+ngUX6hRO%$X48g<6;y zHw_72FVq0O8~{$?*X@s|NP50CwuLVc1xIIoKKF zI!AtTeKo_+1;|f@$Y+3D4(u~CJT10!#4|OBCj!J*Lc}A3mqVknA(}To$$_&U4sj?y2T>?-v6X4`I(Hu-%bZt3y3U zIa`B*+b&qsD_7BgnwqU;=uI~EWYK*BC&;Ja> z^Zbs$9!}2$QMtoUkWSYieP$n5-VZ{gCq`msNG#~4@Khi^4G7Ubm2^73uR?$K(gJQW~62V}(ZH|s&J zjJX{9%^K|E0ru-5_HjIZtTD)GXBwf9K9kOEPj?+3>&^f6mELh&^mhD1gyyRJg=Dnp3CXq^W$u6l#(i#qsfKgL^kHiUYV#fx$gIQ42l& zY-J`(=KJ!8C#r#W{#)*q&xOF}>j17cCEiL%gD+ISH%(+=knGNp~PC389;^HxaaB*;VsW4xHE)4q!s)5(q1IzJl#AJ`oj z)6@6O06hIL@FKuZ-&YO1dw*a|A@I~D4jdlgWv8DJMrVVePw+C0M zwd`Et4PwQ$Ydr4*SXbp8?FYvRFLaan)z4clQu-y@sJH~a0+foaXo6CwjV3qE^+D7T zLE^jX`Y0sQy2xes2ZgJ>9Z!(z4dB7aW4+VskX?A67ZZpb_Y$6RWdXSCqVZjSOuNqL z_cMuFGd(Cs7^*u6{{NI+%8L4BEKMHNB&Jb!ME+$g4U67cd3hZHto+U;R?TceSnv-H zB7W+P$icr^D3|bcR|plaY+{b<1__Vf;`y^#arxm+JgIJ?(L0|QAg20rshI1qh{}gV zF}HOuHxA{|yQ3H>UN;>dr<>6oy*sMj8sPEK2i_G-d$?3f`R5|>q&%v4_!RTnBa4$sw(w5|YZ`qQo$C|=NqXgAZ3gMJRA>u}5ICrh=wT|&>{ z)ObqWzC5!#)eRH9eM|V5Wqo*AA>FlA$gf7g;m^dR;qlJs)TUEes%T4<&TePJ;E>np zIXZp=pd#a!FFN>Th77;EQN!;J!>`lxC{s-SJE8CkJ{)>2p5@_*_5b641bwL>4B%Id F{vT}G{2u@S literal 0 HcmV?d00001 diff --git a/crates/core/tests/data_err_logs/table_h/_delta_log/00000000000000000010.json b/crates/core/tests/data_err_logs/table_h/_delta_log/00000000000000000010.json new file mode 100644 index 0000000000..c1f8c8b095 --- /dev/null +++ b/crates/core/tests/data_err_logs/table_h/_delta_log/00000000000000000010.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317342605,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":9,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4818"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"ba4b200d-e0ac-4715-ad4e-bed8ef1b20cd"}} +{"add":{"path":"part-00001-55fbea2e-0788-438a-a50c-65f809acc05b-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317342581,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":90},\"maxValues\":{\"id\":90},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-0d891eaf-0e01-46a6-879e-49bbca90c215-c000.snappy.parquet","partitionValues":{},"size":451,"modificationTime":1742317342581,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":91},\"maxValues\":{\"id\":91},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-569d12e9-04d5-4fe2-9554-f288047f3386-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317342592,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":92},\"maxValues\":{\"id\":92},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-1bc89ec9-8092-49e1-9b1f-123ae50e3d40-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317342579,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":93},\"maxValues\":{\"id\":93},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-b4a223d6-1d87-49c9-84c9-a85eece61839-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317342591,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":94},\"maxValues\":{\"id\":94},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-4c3b6be7-979c-4f42-8920-efa32b751d97-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317342578,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":95},\"maxValues\":{\"id\":95},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-6c224359-8995-417a-8b24-b2e530327bc6-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317342581,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":96},\"maxValues\":{\"id\":96},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-2a40eb21-34d2-48ca-aaa5-55db674f56de-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317342590,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":97},\"maxValues\":{\"id\":97},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-ca8256ed-98cd-460d-8de2-9f6f7f388703-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317342579,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":98},\"maxValues\":{\"id\":98},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-f131fc78-c201-4e8d-b194-222b2e79778d-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317342578,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":99},\"maxValues\":{\"id\":99},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_h/_delta_log/00000000000000000011.json b/crates/core/tests/data_err_logs/table_h/_delta_log/00000000000000000011.json new file mode 100644 index 0000000000..05863f6cd0 --- /dev/null +++ b/crates/core/tests/data_err_logs/table_h/_delta_log/00000000000000000011.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317349152,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":10,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4818"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"1657fc30-879d-4b0b-972a-4e3a079fdd7a"}} +{"add":{"path":"part-00001-ceaadd5e-615b-455d-8f4b-052b9c94c7b6-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349136,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":100},\"maxValues\":{\"id\":100},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-b7dba1e7-b1e5-4f02-a223-69ec7353ab45-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349123,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":101},\"maxValues\":{\"id\":101},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-f099cf4d-d418-4852-8580-091908847a66-c000.snappy.parquet","partitionValues":{},"size":451,"modificationTime":1742317349122,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":102},\"maxValues\":{\"id\":102},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-418e8d25-7316-442a-9bc8-616ed01231eb-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349136,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":103},\"maxValues\":{\"id\":103},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-7538a9c2-1ccb-4150-b162-ef8d826fe30f-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349123,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":104},\"maxValues\":{\"id\":104},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-7619f42c-5bc4-4e77-b037-f36481c8b63c-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349123,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":105},\"maxValues\":{\"id\":105},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-eb49d85f-91cc-4293-9339-a664ee905b0f-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349134,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":106},\"maxValues\":{\"id\":106},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-81e22719-7705-4703-b2dd-c4e2982217a7-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349122,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":107},\"maxValues\":{\"id\":107},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-883858d1-9df6-4b55-a2be-5b8387134617-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349122,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":108},\"maxValues\":{\"id\":108},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-07d91938-ac89-48cc-a657-6067d2d9f67e-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349137,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":109},\"maxValues\":{\"id\":109},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_h/_delta_log/00000000000000000012.json b/crates/core/tests/data_err_logs/table_h/_delta_log/00000000000000000012.json new file mode 100644 index 0000000000..4cc44fa8e8 --- /dev/null +++ b/crates/core/tests/data_err_logs/table_h/_delta_log/00000000000000000012.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317349950,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":11,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4818"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"9a035bdd-f892-4449-9c39-401f31fcada6"}} +{"add":{"path":"part-00001-f3b19100-b5b3-4e72-8658-7a937e9ed515-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349924,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":110},\"maxValues\":{\"id\":110},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-54f2324a-e97f-4def-9101-9cc10599ba06-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349919,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":111},\"maxValues\":{\"id\":111},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-3f7ca40a-6497-4208-8a1a-11062456a5a9-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349936,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":112},\"maxValues\":{\"id\":112},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-a14852b2-c743-4a4a-b9c1-0c9472c51699-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349929,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":113},\"maxValues\":{\"id\":113},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-97d06207-5584-43df-afc2-2d1738d79193-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349943,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":114},\"maxValues\":{\"id\":114},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-0d431f03-6dbf-40e7-96fc-b1ebbbe9fc65-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349922,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":115},\"maxValues\":{\"id\":115},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-af0f0232-33c8-4315-821b-8bb1323b7a26-c000.snappy.parquet","partitionValues":{},"size":451,"modificationTime":1742317349936,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":116},\"maxValues\":{\"id\":116},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-4f744428-d088-497e-afd3-0b374e453e7c-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349936,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":117},\"maxValues\":{\"id\":117},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-694064b8-137e-45cd-b2ea-e28af172a2dc-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349918,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":118},\"maxValues\":{\"id\":118},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-56073753-4c1c-4a68-9b4a-13ef5d1a75fb-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349938,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":119},\"maxValues\":{\"id\":119},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_h/_delta_log/00000000000000000013.json b/crates/core/tests/data_err_logs/table_h/_delta_log/00000000000000000013.json new file mode 100644 index 0000000000..b2d03d3ead --- /dev/null +++ b/crates/core/tests/data_err_logs/table_h/_delta_log/00000000000000000013.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317350712,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":12,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4819"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"c3cd0fa1-9c72-4344-8225-0b787e52d5e0"}} +{"add":{"path":"part-00001-7a0d95f8-e122-4cf6-b89c-389036a9b415-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317350696,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":120},\"maxValues\":{\"id\":120},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-f1f035c1-bf0f-485c-950d-c81d0d2aa8a2-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317350706,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":121},\"maxValues\":{\"id\":121},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-d7a51e45-70f3-4379-819b-341951abefff-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317350691,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":122},\"maxValues\":{\"id\":122},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-4828722c-5799-4be1-ace1-14bd7f477dbf-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317350691,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":123},\"maxValues\":{\"id\":123},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-f3c3c72e-5d71-4dc9-9e15-342f1d6cb6cc-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317350701,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":124},\"maxValues\":{\"id\":124},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-e8d74ede-8876-4f55-8e9f-1bbde0d07a35-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317350696,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":125},\"maxValues\":{\"id\":125},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-91052146-2292-45c3-b57e-1fd2dd6be6ed-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317350692,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":126},\"maxValues\":{\"id\":126},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-9d203964-9f4c-4c84-ad77-9ba305bb6572-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317350706,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":127},\"maxValues\":{\"id\":127},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-8362228b-acf6-4937-875b-26c013c342e1-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317350690,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":128},\"maxValues\":{\"id\":128},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-6f57658e-1953-4b59-b504-27c9e8c5cc3b-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317350677,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":129},\"maxValues\":{\"id\":129},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_h/_delta_log/00000000000000009999.checkpoint.parquet b/crates/core/tests/data_err_logs/table_h/_delta_log/00000000000000009999.checkpoint.parquet new file mode 100644 index 0000000000000000000000000000000000000000..9fd1755fd595e3c7143926272b940120fd45856d GIT binary patch literal 10569 zcmeHNeQXrR72jJQ=dZ)q!L#gvaj6roj$}JF*w|R7s&P z?(E*&oqgb0tX z_OgmpksrJ?|8j*3SwfMELRXX3otRo&33g~M9Z3fk|#``bYl*+4>*g~tx*!fzW$ z13CBFhX<>26etA+`jynT1~da?;2e(peD2>Yqh+Th5hWEf=1mWQ-#?taJ(QDGJx_zg z7aELx{aQnJL)u6UM2v=}hN!N^)O17l*I#HzC~;TQ}6 zq}J3B*9=8f45gv_g#!n^alq1V_=CaUudr>+wt;%9p45|n{Osdjm)K)$Uy8!X@RyJO z$RnVoPr=W%?_B&3vmu*F6jp;NsunYpmZ+u}11YT~0;RRnma5tQzS+H^3pql%j-xPQ22N=jD}MjD@#{(dc? z?nuP;0l|W)>@HshYCHj1fL|g8B(L!4O19Oo!301*Xlhj zvgTk38*s*H-f)_eUPdm90OBBm;th>V{Nn0A_@%h}k=%h}lRDmHf3X^yR6y_cNk(Q4K^#SkMkZUIAIm&<%h-rNjobG!cooy$NVLymI-o+P-h@#ctv?!6ndR zi}iz`W%RAxgNk_5`nNSxr|Ni8)uTErPGEmhk3+WrBP#Hl?Xf*dqEAEJsvwhDNxO~mwswq>vVN(5zm$!IjK8Ss?=<0?^)t%;s} zhL$$fsf;KoRZl1}-PnhMy@t}aL~}*CJglU(UCHORs;1{xspkAa*JH_eTsNMw=Y_f} zZ74?C(iv3nrL=f*uSTTYsiE^&WBUfZ2iSOjt_#7ZqFk#zZ@@R0rarF6v@coT#c1UJ zc6z~S=Je(>W~u)C#A1QEy6A}nHOuhiu?3=-RGSkg>TEHhXS{ffu_F^ZDpf5VN$LF< z=JxChvM-uU#TCO|TV?&J_UVeo~Kj3i@Fk)zAGI<6;nX$p9UEht>&*!|U%ZaCc-MW7c9eB3;j6>StE zpQBuNgH)4oMNimE&rZ}nv)gtXx-#2M_8Ux$rDirz1<4zz3*6ciEUP2_Ku4KR2sC2$ zH?uy0*HX9C9V`oNg0oN^oVmb7f*>7iBel{r9B+kS4;&#~Kznmw&>jvtg3ymBbbmI~ zy^~OQND2k_bMO-gAN>qUJ_V;&rZen1bhlx0PkA`xnKmLxCn@q8MXm>9drJVwCoX3% z!aayG_Da3MYdqrxWPF!0o)%G;A{fmD!#GcioR)LQd5v-oV%flUE!v0qkl8m_!Xa-X z^2j>$%oz@uvpX`DGd#=424I<>EH{8faLIRVrVM|B$B!fa9K}z1+5B{&o4MWBz3Wmw zobE|4XVqEc`+)MzFU-g7){JtVr<_8{?|l{>bpmQGqG@726|G&KQJq-EX*Z8l$0^m! z!c^{W1jaYR^9^nUzBede#>ZwqpK9(e491u7ar%uS-}{tr8u(lhN_Wr#fj-UCOd!p{ z2K3l5DEJhnaYgqwX&VgY^mzk`UZq4=uyW*zE@x9@=3L?7?;-pQg^xqRa>EOK3^B5C zK8IaJviB(2tULQbq%Nc1P7FHBLl3nB^d^OlL7mi2Le?i%b{-ll=X~%Af`9xVI&&I} ztdKt=6nQ`Om_8SHvP(#I zm68orvM{mKDKg0pRdO0$N7A8(&|On^K*~kp6i<7&189FqX)|~`mOTqjs>P&~@pC$5 z5d0?!o(3>C|6KG;^XQX^o}=hdD1}<9$qpVoDHK?jqsuux&LPvG_2`?+i!pHyxy*CD zk6gzn*ErPgEt|S5wTj8z<5iq`H<9vnN;$hIWuf#v%d-w`0@llvbqvbWcKXh*D4)T{ zR&Y8VL$=$L?b4!bd6W1hp7JzOz7oV)IaZ87FC8Vj?MjHJKRZRY2o8C4oeqvu63GYr#Q;AQ`#9kzcR#X#jF9|vg^mJ&u#M*O1s@ix`kk- z7IntY*st-z?DJuP+?1*1Y?w#ZQxBsj=I(^mm8bBddyeNF>;m2?$~y^{@Hv5(b^fG| zfpXhTPjHfi4o^Z+!p(7nb&??A-`ZbTMBy3Lrz$+*%)4ndAP3x|5^1B5xP zkR7g-8J5BT4Y@8!Tg2hbAmU^TX%T`mg+yf4bC`%36oL0X=D#Syd2)-$pokeN;`kcJk#<3e zD-Pwx^d4zX@c5cg_N7wx69~_`a>{<|R^}usGyDj7LI@7km_fn4F1%@1zeLH)RQgd zrR1~p+5{fxB}%h(L)&b+S*|y4Rhq3kmUQ#RmiCs`=G{*`x^s8$jwc1V#vIrJ7Z+Nr z1*^F7gOs@0ByP@6EXdnzN%7P=>n3JhTT9!9Su2aM9ZCxANS~sg5;m(ci uTBKw1##Xhfvo)eBTHB^Jt-ZBNRl~}|Q22veo%{S(n1KKPvJ?Ij<$nRQB4*(L literal 0 HcmV?d00001 diff --git a/crates/core/tests/data_err_logs/table_h/_delta_log/00000000000000009999.json b/crates/core/tests/data_err_logs/table_h/_delta_log/00000000000000009999.json new file mode 100644 index 0000000000..d78fe64124 --- /dev/null +++ b/crates/core/tests/data_err_logs/table_h/_delta_log/00000000000000009999.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1742252106605,"engineInfo":"Kernel-3.4.0-SNAPSHOT/test-engine","operation":"CREATE TABLE","operationParameters":{"partitionBy":"[]"},"isBlindAppend":true,"txnId":"138db460-c7d3-4d13-972e-0f4e35b58b43","operationMetrics":{}}} +{"metaData":{"id":"ebffcf7b-bf25-427f-a91a-d4a0d27ebbef","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"createdTime":1742252106422,"configuration":{"delta.feature.catalogowned":"supported"}}} +{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["catalogOwned"],"writerFeatures":["catalogOwned","invariants","appendOnly"]}} diff --git a/crates/core/tests/data_err_logs/table_h/_delta_log/_last_checkpoint b/crates/core/tests/data_err_logs/table_h/_delta_log/_last_checkpoint new file mode 100644 index 0000000000..604c2b31a2 --- /dev/null +++ b/crates/core/tests/data_err_logs/table_h/_delta_log/_last_checkpoint @@ -0,0 +1 @@ +{"version":10,"size":102,"sizeInBytes":21615,"numOfAddFiles":100,"checkpointSchema":{"type":"struct","fields":[{"name":"txn","type":{"type":"struct","fields":[{"name":"appId","type":"string","nullable":true,"metadata":{}},{"name":"version","type":"long","nullable":true,"metadata":{}},{"name":"lastUpdated","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"add","type":{"type":"struct","fields":[{"name":"path","type":"string","nullable":true,"metadata":{}},{"name":"partitionValues","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"size","type":"long","nullable":true,"metadata":{}},{"name":"modificationTime","type":"long","nullable":true,"metadata":{}},{"name":"dataChange","type":"boolean","nullable":true,"metadata":{}},{"name":"tags","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"deletionVector","type":{"type":"struct","fields":[{"name":"storageType","type":"string","nullable":true,"metadata":{}},{"name":"pathOrInlineDv","type":"string","nullable":true,"metadata":{}},{"name":"offset","type":"integer","nullable":true,"metadata":{}},{"name":"sizeInBytes","type":"integer","nullable":true,"metadata":{}},{"name":"cardinality","type":"long","nullable":true,"metadata":{}},{"name":"maxRowIndex","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"baseRowId","type":"long","nullable":true,"metadata":{}},{"name":"defaultRowCommitVersion","type":"long","nullable":true,"metadata":{}},{"name":"clusteringProvider","type":"string","nullable":true,"metadata":{}},{"name":"stats","type":"string","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"remove","type":{"type":"struct","fields":[{"name":"path","type":"string","nullable":true,"metadata":{}},{"name":"deletionTimestamp","type":"long","nullable":true,"metadata":{}},{"name":"dataChange","type":"boolean","nullable":true,"metadata":{}},{"name":"extendedFileMetadata","type":"boolean","nullable":true,"metadata":{}},{"name":"partitionValues","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"size","type":"long","nullable":true,"metadata":{}},{"name":"deletionVector","type":{"type":"struct","fields":[{"name":"storageType","type":"string","nullable":true,"metadata":{}},{"name":"pathOrInlineDv","type":"string","nullable":true,"metadata":{}},{"name":"offset","type":"integer","nullable":true,"metadata":{}},{"name":"sizeInBytes","type":"integer","nullable":true,"metadata":{}},{"name":"cardinality","type":"long","nullable":true,"metadata":{}},{"name":"maxRowIndex","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"baseRowId","type":"long","nullable":true,"metadata":{}},{"name":"defaultRowCommitVersion","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"metaData","type":{"type":"struct","fields":[{"name":"id","type":"string","nullable":true,"metadata":{}},{"name":"name","type":"string","nullable":true,"metadata":{}},{"name":"description","type":"string","nullable":true,"metadata":{}},{"name":"format","type":{"type":"struct","fields":[{"name":"provider","type":"string","nullable":true,"metadata":{}},{"name":"options","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"schemaString","type":"string","nullable":true,"metadata":{}},{"name":"partitionColumns","type":{"type":"array","elementType":"string","containsNull":true},"nullable":true,"metadata":{}},{"name":"configuration","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"createdTime","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"protocol","type":{"type":"struct","fields":[{"name":"minReaderVersion","type":"integer","nullable":true,"metadata":{}},{"name":"minWriterVersion","type":"integer","nullable":true,"metadata":{}},{"name":"readerFeatures","type":{"type":"array","elementType":"string","containsNull":true},"nullable":true,"metadata":{}},{"name":"writerFeatures","type":{"type":"array","elementType":"string","containsNull":true},"nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"domainMetadata","type":{"type":"struct","fields":[{"name":"domain","type":"string","nullable":true,"metadata":{}},{"name":"configuration","type":"string","nullable":true,"metadata":{}},{"name":"removed","type":"boolean","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}}]},"checksum":"94a578f92841fa7ba9cdee96b5905fdb"} diff --git a/crates/core/tests/data_err_logs/table_i/_delta_log/00000000000000000000.json b/crates/core/tests/data_err_logs/table_i/_delta_log/00000000000000000000.json new file mode 100644 index 0000000000..0316f09771 --- /dev/null +++ b/crates/core/tests/data_err_logs/table_i/_delta_log/00000000000000000000.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1742317262289,"operation":"CREATE TABLE","operationParameters":{"partitionBy":"[]","clusterBy":"[]","description":null,"isManaged":"false","properties":"{}"},"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"c0983a7a-aeca-4ba8-a509-d5dbc71a10de"}} +{"metaData":{"id":"5f54ef5f-e511-4114-b6e5-f6c206c068b6","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1742317261939}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":1}} diff --git a/crates/core/tests/data_err_logs/table_i/_delta_log/00000000000000000001.json b/crates/core/tests/data_err_logs/table_i/_delta_log/00000000000000000001.json new file mode 100644 index 0000000000..23002c6f58 --- /dev/null +++ b/crates/core/tests/data_err_logs/table_i/_delta_log/00000000000000000001.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317316973,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":0,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4819"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"a1c49e36-8fcd-46e6-8ede-5b8560f7ec3b"}} +{"add":{"path":"part-00001-665397f5-1435-4478-a598-ca226c99ffcf-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317316825,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":0},\"maxValues\":{\"id\":0},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-fd50becf-074e-4a1f-985b-01529e9f7b03-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317316820,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":1},\"maxValues\":{\"id\":1},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-4fc1f70f-9daa-46e6-83b5-ea8144d4a96d-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317316822,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":2},\"maxValues\":{\"id\":2},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-efb0808c-3b7f-4a4d-bc36-daa91c074b5b-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317316825,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":3},\"maxValues\":{\"id\":3},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-c8664e02-01fe-4c2d-8eba-ae84012d7aad-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317316825,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":4},\"maxValues\":{\"id\":4},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-7446ef47-3110-4c3f-a2d0-0c71bafc893a-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317316820,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":5},\"maxValues\":{\"id\":5},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-ee372cff-6aae-4979-970b-88cc154a31bd-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317316822,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":6},\"maxValues\":{\"id\":6},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-9ea59fdf-fc26-4650-a282-9c2cc1906c7c-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317316823,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":7},\"maxValues\":{\"id\":7},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-d50ebfbf-e534-4bc8-b63d-437f6029da6e-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317316823,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":8},\"maxValues\":{\"id\":8},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-96e67376-3d15-4895-bd5d-5e0a325bcb83-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317316823,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":9},\"maxValues\":{\"id\":9},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_i/_delta_log/00000000000000000002.json b/crates/core/tests/data_err_logs/table_i/_delta_log/00000000000000000002.json new file mode 100644 index 0000000000..86e3c7470c --- /dev/null +++ b/crates/core/tests/data_err_logs/table_i/_delta_log/00000000000000000002.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317326453,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":1,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4818"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"ac59851b-981e-4e82-96ea-36a543cfe254"}} +{"add":{"path":"part-00001-6491d41d-d498-4a89-a291-92d964035606-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317326374,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":10},\"maxValues\":{\"id\":10},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-d359921a-3cb1-454d-8aa0-ac5c830fcdc5-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317326373,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":11},\"maxValues\":{\"id\":11},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-b2aae64d-1fab-4106-bc87-2454e945dada-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317326373,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":12},\"maxValues\":{\"id\":12},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-cca989ea-d56e-4e1e-a4ba-538ef7801997-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317326373,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":13},\"maxValues\":{\"id\":13},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-14706643-f3f0-4ba9-8282-7d55bb4ecacb-c000.snappy.parquet","partitionValues":{},"size":451,"modificationTime":1742317326374,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":14},\"maxValues\":{\"id\":14},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-25801ed5-1cf2-43fa-bbdb-8898fc102e64-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317326373,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":15},\"maxValues\":{\"id\":15},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-03fbeb6f-b8a5-448e-afa7-0f49fca61866-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317326374,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":16},\"maxValues\":{\"id\":16},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-08361c76-870e-4ddf-9153-f67852849ec3-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317326374,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":17},\"maxValues\":{\"id\":17},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-368e738e-0673-4e76-a1ff-5ba9c755396e-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317326373,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":18},\"maxValues\":{\"id\":18},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-2c9a3837-e2c5-42bd-b888-f3205f4b894c-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317326374,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":19},\"maxValues\":{\"id\":19},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_i/_delta_log/00000000000000000003.json b/crates/core/tests/data_err_logs/table_i/_delta_log/00000000000000000003.json new file mode 100644 index 0000000000..14665bcd27 --- /dev/null +++ b/crates/core/tests/data_err_logs/table_i/_delta_log/00000000000000000003.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317330682,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":2,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4818"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"5e6fdfee-2a82-40d1-bc83-47b3f0f3f21e"}} +{"add":{"path":"part-00001-b7be8377-b715-4234-b316-201fd2c9c142-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317330578,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":20},\"maxValues\":{\"id\":20},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-719b1086-b19c-45d1-8c4c-c11db02e2e0b-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317330581,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":21},\"maxValues\":{\"id\":21},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-eb0fd03c-3e47-42ed-9897-e79dd1567fb1-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317330577,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":22},\"maxValues\":{\"id\":22},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-48b99dee-6d3e-4cfb-b651-4769de7f5b24-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317330577,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":23},\"maxValues\":{\"id\":23},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-b873a231-8352-4bac-b6f1-b53ee738d212-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317330582,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":24},\"maxValues\":{\"id\":24},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-74ed051c-b116-4947-b62c-2086bcd5bb90-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317330577,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":25},\"maxValues\":{\"id\":25},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-680d9e49-afce-4a7d-bca8-b03438c2fd74-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317330577,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":26},\"maxValues\":{\"id\":26},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-81c10052-aacc-4ecf-b9cf-64f81b3bd435-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317330577,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":27},\"maxValues\":{\"id\":27},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-cf9cac69-932c-43bf-8e4b-fd059d519c0f-c000.snappy.parquet","partitionValues":{},"size":451,"modificationTime":1742317330577,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":28},\"maxValues\":{\"id\":28},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-43e3252c-8ac7-4c7a-bcb4-15aaf7ae95b9-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317330581,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":29},\"maxValues\":{\"id\":29},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_i/_delta_log/00000000000000000004.json b/crates/core/tests/data_err_logs/table_i/_delta_log/00000000000000000004.json new file mode 100644 index 0000000000..ed86e283d5 --- /dev/null +++ b/crates/core/tests/data_err_logs/table_i/_delta_log/00000000000000000004.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317333588,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":3,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4818"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"57611e9e-5312-4502-a3ad-c0c78799773e"}} +{"add":{"path":"part-00001-5c92b4bb-af84-4066-8aeb-1e493b7147df-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317333524,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":30},\"maxValues\":{\"id\":30},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-b050a084-ab25-420f-bb7b-50eb95d25e4e-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317333551,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":31},\"maxValues\":{\"id\":31},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-1880b504-fcf5-4f43-92d4-c43e8dd9d7d9-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317333550,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":32},\"maxValues\":{\"id\":32},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-4af9173b-8472-41cd-8772-e2bdb084c5d5-c000.snappy.parquet","partitionValues":{},"size":451,"modificationTime":1742317333524,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":33},\"maxValues\":{\"id\":33},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-89cec234-f844-4802-a786-5d9133bbe489-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317333523,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":34},\"maxValues\":{\"id\":34},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-fa11b282-ec0d-4513-9baf-2b84c5f94a12-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317333550,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":35},\"maxValues\":{\"id\":35},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-895723cb-0dba-4019-a2a9-e6db9a937c91-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317333550,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":36},\"maxValues\":{\"id\":36},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-dca57e7a-f859-4b39-bc43-03e1061f1b4e-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317333551,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":37},\"maxValues\":{\"id\":37},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-96756753-7714-4c07-a238-d5b57f42a8ce-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317333523,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":38},\"maxValues\":{\"id\":38},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-9a7b861f-5d9a-41c4-b4ec-7f0d1391acfe-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317333551,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":39},\"maxValues\":{\"id\":39},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_i/_delta_log/00000000000000000005.json b/crates/core/tests/data_err_logs/table_i/_delta_log/00000000000000000005.json new file mode 100644 index 0000000000..f77fd655fe --- /dev/null +++ b/crates/core/tests/data_err_logs/table_i/_delta_log/00000000000000000005.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317336099,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":4,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4819"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"203edd74-d2b2-46fe-935a-6222cfb888d4"}} +{"add":{"path":"part-00001-a24fe71c-ba35-47bb-8f3e-636d5991d5ae-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317336058,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":40},\"maxValues\":{\"id\":40},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-a56ad50f-bc64-44cb-bb55-e2d177947b3d-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317336079,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":41},\"maxValues\":{\"id\":41},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-95c53cf1-b472-4c34-b728-1dd7cbed8b2f-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317336074,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":42},\"maxValues\":{\"id\":42},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-7e442ced-e810-44d9-9d28-3027e652a0ec-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317336080,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":43},\"maxValues\":{\"id\":43},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-68248457-2fa3-407e-9de3-759b1e052b99-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317336075,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":44},\"maxValues\":{\"id\":44},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-89a48ead-5bf3-4d16-aada-97c11386fcaf-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317336076,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":45},\"maxValues\":{\"id\":45},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-42b618de-c46c-4888-9b48-b99493ec2983-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317336070,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":46},\"maxValues\":{\"id\":46},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-4dc49305-f4f8-4ec9-9a40-8f4b3bd81324-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317336055,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":47},\"maxValues\":{\"id\":47},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-d7bb17a9-223e-474b-9d78-2c745cc35a4b-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317336054,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":48},\"maxValues\":{\"id\":48},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-68a79bb6-a31e-49bf-848f-2d64ceb834c0-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317336079,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":49},\"maxValues\":{\"id\":49},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_i/_delta_log/00000000000000000006.json b/crates/core/tests/data_err_logs/table_i/_delta_log/00000000000000000006.json new file mode 100644 index 0000000000..28116ba9f2 --- /dev/null +++ b/crates/core/tests/data_err_logs/table_i/_delta_log/00000000000000000006.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317338700,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":5,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4818"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"9ee4b86b-1bbe-4d6e-adbd-8dd4961989fb"}} +{"add":{"path":"part-00001-a8fc5b00-29e4-4a99-961d-b0cbcc23d165-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317338678,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":50},\"maxValues\":{\"id\":50},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-5a3079f8-abbc-4b5f-a1e3-340830e59222-c000.snappy.parquet","partitionValues":{},"size":451,"modificationTime":1742317338670,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":51},\"maxValues\":{\"id\":51},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-567d7b20-b6ce-4e96-b500-caa34c80f8a7-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317338676,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":52},\"maxValues\":{\"id\":52},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-b794df4b-174f-468a-9de7-2aa865ba7014-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317338676,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":53},\"maxValues\":{\"id\":53},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-a15406cf-c141-4f7b-b302-e4b5a145cad5-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317338675,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":54},\"maxValues\":{\"id\":54},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-8a52349c-d93b-4c59-b493-13486bb5e284-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317338680,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":55},\"maxValues\":{\"id\":55},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-1734b4eb-4414-4b3a-8e99-1bd099c9e6b5-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317338670,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":56},\"maxValues\":{\"id\":56},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-02781f2a-6c34-42ca-80a4-e830b2eeb963-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317338676,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":57},\"maxValues\":{\"id\":57},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-63ec1c21-c31c-43d4-b5c9-9c206aeeb280-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317338670,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":58},\"maxValues\":{\"id\":58},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-b732e8e4-7d1b-470d-89a5-86a3f8d8bdc2-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317338680,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":59},\"maxValues\":{\"id\":59},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_i/_delta_log/00000000000000000007.json b/crates/core/tests/data_err_logs/table_i/_delta_log/00000000000000000007.json new file mode 100644 index 0000000000..956c8b508c --- /dev/null +++ b/crates/core/tests/data_err_logs/table_i/_delta_log/00000000000000000007.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317339658,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":6,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4819"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"63c885b0-74eb-4075-a02a-a43b8202b3f8"}} +{"add":{"path":"part-00001-f697bc51-b77f-4234-938a-5f85478cedec-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317339621,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":60},\"maxValues\":{\"id\":60},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-839f210e-cf84-4c5c-b185-fd2fe2b5ee6f-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317339491,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":61},\"maxValues\":{\"id\":61},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-9ebe1c22-87a1-4f37-a695-77658c3e70a8-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317339491,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":62},\"maxValues\":{\"id\":62},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-01b2687c-45e4-484c-b1d6-80e06b5b5d11-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317339490,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":63},\"maxValues\":{\"id\":63},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-05662cc4-6a79-4204-aec1-2311a44d8c74-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317339511,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":64},\"maxValues\":{\"id\":64},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-67f1b806-ef5f-4f8a-890b-b3b5ad1d234c-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317339490,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":65},\"maxValues\":{\"id\":65},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-37318455-4128-4e1e-9ab7-5c587ac9fde0-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317339621,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":66},\"maxValues\":{\"id\":66},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-c5d68934-1f5a-40c4-b5be-1233eb15378a-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317339511,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":67},\"maxValues\":{\"id\":67},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-41e40903-13b6-4465-aa3c-bd8cb5e52b18-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317339621,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":68},\"maxValues\":{\"id\":68},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-110c626e-ea13-4204-8cae-a3183d89a4b7-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317339621,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":69},\"maxValues\":{\"id\":69},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_i/_delta_log/00000000000000000008.json b/crates/core/tests/data_err_logs/table_i/_delta_log/00000000000000000008.json new file mode 100644 index 0000000000..ea216c1556 --- /dev/null +++ b/crates/core/tests/data_err_logs/table_i/_delta_log/00000000000000000008.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317340794,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":7,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4818"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"b5902ae9-2aab-46f9-82f9-d68ae45940d7"}} +{"add":{"path":"part-00001-3b62f1d4-2a3e-4611-a55b-e9d2ace11b3c-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317340752,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":70},\"maxValues\":{\"id\":70},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-9c066923-23f4-45f6-b2af-5a4ecbef1707-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317340776,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":71},\"maxValues\":{\"id\":71},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-cfb48b6e-0fc7-4d6b-8ab7-c52f29f71b94-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317340752,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":72},\"maxValues\":{\"id\":72},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-b21a45a5-9a53-4dfa-8327-8a82b6b283e9-c000.snappy.parquet","partitionValues":{},"size":451,"modificationTime":1742317340766,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":73},\"maxValues\":{\"id\":73},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-afa450c4-f649-4c88-817a-6d0bdfc4da6f-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317340765,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":74},\"maxValues\":{\"id\":74},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-dd96cd25-394d-4873-84e7-f2f6b0eb5a67-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317340776,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":75},\"maxValues\":{\"id\":75},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-3a98b7ed-8665-4bc5-8704-6745f7084cd0-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317340776,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":76},\"maxValues\":{\"id\":76},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-8dfd3dc5-cf31-42fc-8c55-2ac70ce9e18d-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317340782,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":77},\"maxValues\":{\"id\":77},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-97e7d287-aee3-445d-a90e-f3b2ef4bd7cd-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317340765,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":78},\"maxValues\":{\"id\":78},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-db2340aa-28ff-4826-b39e-07ba516551e9-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317340779,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":79},\"maxValues\":{\"id\":79},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_i/_delta_log/00000000000000000009.json b/crates/core/tests/data_err_logs/table_i/_delta_log/00000000000000000009.json new file mode 100644 index 0000000000..1f9d5fb05b --- /dev/null +++ b/crates/core/tests/data_err_logs/table_i/_delta_log/00000000000000000009.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317341714,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":8,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4819"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"5379512e-d4d3-42b5-817d-70ecf05f2385"}} +{"add":{"path":"part-00001-4e7175fd-6ffb-4b6a-946c-43aa7c439104-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317341702,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":80},\"maxValues\":{\"id\":80},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-f3cc4f07-93ec-4a47-add1-b16c1149c3d9-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317341667,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":81},\"maxValues\":{\"id\":81},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-07581f2d-ee98-4464-a28b-f738e88749e4-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317341702,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":82},\"maxValues\":{\"id\":82},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-c9d7468d-fc14-445c-8dbd-65d616f8eb05-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317341666,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":83},\"maxValues\":{\"id\":83},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-56eb2260-fb49-4138-a5c8-f0ae0949f4e2-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317341670,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":84},\"maxValues\":{\"id\":84},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-5d15eeca-0fca-4986-a18e-4d86bf5ba2f6-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317341670,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":85},\"maxValues\":{\"id\":85},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-3e13d733-f55d-42ac-be4f-f4400e999c29-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317341702,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":86},\"maxValues\":{\"id\":86},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-22c23f1e-26d2-488e-8e07-2de6ae5fded5-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317341667,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":87},\"maxValues\":{\"id\":87},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-d0debf09-5f82-4c61-8636-27e51fba37e5-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317341674,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":88},\"maxValues\":{\"id\":88},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-7403e0c4-bb07-4a1b-9fca-a01523713f85-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317341702,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":89},\"maxValues\":{\"id\":89},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_i/_delta_log/00000000000000000010.checkpoint.parquet b/crates/core/tests/data_err_logs/table_i/_delta_log/00000000000000000010.checkpoint.parquet new file mode 100644 index 0000000000000000000000000000000000000000..9df253f2f762891c53763e62e57576217a8b289a GIT binary patch literal 21615 zcmeHve{@vWedn9eNJxOjGSQn1ss0cYt8$MXeZSv*J~}DCYRg2%@)&#F)J}Zgd-s8C z{i-xzpbN&@WK1Zf3Lz{>Xcs&FL5b`75Z6szH)~Rs^*WB%OX8GzecHfw-NyBYj6pooH)6&Hv<{p$ON=68rMrSQ2) zY!-w$`36A{UP=i+lL1qsVriLp{0A?WQk6tru+9`fOL!!eZa8>2pI(Xf7#%N4vMejI z6$9Zxb1&s0pGFh|C){f0z+svEMa zOTMYw5>`PZqF7WiJ=yVmSNEvtOWSq}`z?u3L)CQKl{_09fR-&ex~)lyrW4a~45kuY z+H!-iob2#zm%64c*_!G~y5{&2VY(yvo^05jq1dkMPrj476RTyGVi`Ud^L=1CCY5a6 zOx@B+spaaLt;mz0E$fo3II2l3pcxDc!crwi@l1)xOg0_EF+4@#Lv@Mykc3QJNpmb8 z64O1&aWzx&D76(=wsee^wsfQx9qv{PT{c}`f{d2m-)A|Jqsb5^MomvLRFGOQMbnBT;2vavab%Waik0r(%t4(oN}xL~NJ25WD0PebM25%%v7FCBw57 zO>-Qk6MHB0?dVihHONEp2uOn~mZ%Xhbum(Wgc^}mMKWX z)g04dmSkHnUftK>X*Qgg`BYOK%TPSKM4RrkB-8R0hsY+rF)szB$>E;-SHF;q0;@`MRGiG}D_2)n$qJSia*CMN=nf z(|-7Xi6Ce@4q!AyJ>BNMuM=NVJyUm?Ll7ysPfuZ)h)uo))3@Q82*wQDY7Q)d5=GZN zo!aK)+J^nGFdNeLR2VkQ3G;Nrlc09MW^BTAS=_G|`8Q9rwLZa=kn+(G1aWKGwItNNaWn3uYt z8=+p;U^v9Kvwt|Z9d<+u-ACWvayW$$uNp*F7;=c>`k<;oehxB=M2JnGEm*7>g!pzSMKj4MuN{_XT3UAQ zTzXpfeb_IxUCS^u+bngYIyA{f%G2Caf8LCQb+mgGG-M$oDjrnkFb@kyY#^YgTe4}w zhkVVKv-6r)K|Csf`*?=oz@3mz_FTy9P%WbPDzcM{h^VWGtwg2>`B(%j0kPB6CJ(28 zCmW`zx=3p#tT8*%vJ9aXQA$GagMsKCad#e1uSJkz)Ic_dK3&y>4H^(i>eX&&)pZpc z$-$a@yQvrUVpGc@@NFJ>X||;`qf3hx^hlO3dkWGNb^X%e)RZn+Ob1`6Fc=SwpadE) zT?vWBVy2-|nZaJ*tP*myNo=e`@eOd8+TCk7Oj1-$V-8%^B6J%v07(0arueQ!Bv++t zn;G#=J&1&;sw$Rj*_K5ncQs}^uFCt4P8^dRJ=B4i=b6x2_UqYh*HC@c_ASM+cegg= zb%p7&EoXPM-VGnsv0_9T4S4_}K?*liMb!QfyY6-)=Kdc2eiIT$`CfJSR{wLr??;LW8Tq z3>nf3Wds>U^(;zDZYpm{K2;QF`Clw!E($c=P+&$5j9hiHpSfzjZ|ijDWAmmE3|(7w zbjN|mA(bMPBY;q(X)F;HiUphUO4nw35bibGR}};dxABIIYh;JAI&1{%@KHD+L7K=X z%V2Q|Ok750q==*luER?l8|yNW36$(tnlrjq`fj66Bm^N#bz$xBx1(+C7F2A>5AEuN z$rw7qo`clQZAMXK*Hlf0h^A^72BHc| zHUxbSxk{y!nD8b`Rwi2;xV@+<>Hr@xRW-d+W*cD;-qFn+9z&8v1$A`mmiyD)usKim znB&WKc4+?5kM)^II3}teSf8Qz4%I9M8KXS0pnJqlOVX4F{-WR2d?a{JmSh)}LVXvO zIXRXYKwOhGq#Tq92#^RsI`UNZyH_J`$iAMPx~dJOg=xZKvcJ}{jM~JpVEgF7NTlZM z;Jgm_prsqWmEE^M@JiQYS7{QePn3eNI*ow=U0slF*KLGi%l144MYiuuem%P%86GvM z;^~SfdB{wts0c4ZPh zDU|vy?8H_zgaRK%WB5qTDvZmZh%yfI6-zE1Pj`SCGKNC3UuqHb?C&qgzBISjhJ#6t zZNs`yli8&7t@+)se+_ZQheT6)cFlrSNK#k?(!Gh)jY3TGO8;eU8;S$VK-!#-z`z!{ z&AO?dIwqqcM%Bfz?Lb0CuH)WrBM({{GPmYIeW=6Wnn*kblToc0j_Z`*oyt+FYbrw~ zLWD$_sash&hav*?qDfI^BCvWc5;FZt)>_c+BL_1B3hHxQEWp&@RGQ_RvWm>R^Lz`W zVo*)CY@eVYbzu4q>L02g*J{XYn#>GaRrl-`c0jDy9B^BNXmym*FjfR6)rE##SAz^4 z#MM01ui2KL?Vr2+cpr)}lzvd3ixSmzZQJ*=?d_B1oNnyI9CXQ&?__@{W)v-T12!O% zFHCc7xV-S!lf*^;ay)4^DWuWq72q%tR`U(Os z_6zW;?5%A&ycx+Abu7Y~!i$9tc(BYgPcI>oH;J9FF^|D^y+v-1i+zz}qR7=P^VG*P z$Q7Hj+emuKl05i;i@k!RA%vhruryR!2pv9ZHV4InL3!zRH{{?ss9j|gSt>!)(g{)> ziq>pyL8n6v#WW0sAzLH+Dv$tnw2+1CFM0<7@2J=VIp`X)jt5omypZXESWycYjx0;+ z1OGIDG7a(1gpWG1>$r$fs9sCQ<_Ox+IYo+k!9#+hFj17oD2PzRsVG30jUp~9e+03p z9ccsm!lQ@4DKZjv0$4A$jVh{U$Ay+18&wQ<4X^r8bD^k^N(LyUr9LC z*p})R8kaZs3Js}t0bo;2;x`+_CtrW?$Bn(B!?AZXESdPv`r$7ESbx5yIrHEDrlnbM zTbncOPqegUPJbI+cfEiv(kf>@e-K?wFX5BZN-}?UPfMGiwRQ_jTbqU7XdMtff}i)d zZWs2o_MtDIx$2pgWx`17m@tk(U&R0OTRVjf_@Zm7MHb#@J(T(1e-4Q6x9$)=g(=%w zW#Jn9KNs!$TE8LuE!u6Zd10WH2zy#L3Eyk&%>1`+x7;nf2|DWmcMhFRt;>Y>THA!} zXwPfS_ul(LxEAyI@FxWIjgdrI2Qz@#ZUs={Ue--WMd$_0n3*@Ty z8^5!v<%_59_~^dD|9WiUUHdxsKe+WD=J?CIuXFGC{EES)3zW_SZ3hm0>CwK&9{ZOs z?RfCyQ@{V-)c^RWbl$2x;dpIGtFm3qDfI{v-IBO8Vb%pD!~CKg|(^ei48 z9J!qiZD9EHdy5CX#n;(nD|+ds+po_FOGY*f4XqsAFj8FH+S4p=-Ma5Kq1jNkuD(rZ z%)N1&kTK0Qy!qyBLg8@KrD5!))tJK= z;pRP$Hx$+iDRIrM!j~UyD4a(>r}St;G|4VJo=OFLXiCh-T8!f0c9}THL~ICJ;X?GS zQ8*W(2zt3MV_9o%4fz2Rk7UX)Uu_IqW(7={h+3WunD;{gfN+_JQYeoj@wa*)C@kZs zz2bk(4HmlZp*e-Vxxq5g6s*FJK^r5oM)QiFMxaWe5OGclH#LO}CF=^;%4PEtEP(&y z5L^mMz%=i%KLOg+B?kfF@*E7L8t5Qx;9? zhbwN0L}$lKwE}K97iL*!&_y38an#4sx1Y^qw)VVvt5CS90jqyZ`0|g_h1G!M><&cB zH5c_>|KoI^WlUZJ=K17vslq#RLWw<|0{lbKqKv}9XyGIf7)N?9n)7(7a4sFsQ5L&QW|IwjMQ(g zcpS6%H;07V`!k>X(wr|Jf*JF^Ljqh%&{KN@P4gi?jl^*)A1#pVWb=wckr8NxvN>1~ zc-hPBLVv`mQuxZ8YVPc^V^+9v`7jUo? zz-BvWDN3H_PJZog=T$;|^o}xL4S<>hzM|_cpz;c4`@j6%v1CGZ1;bf@B?UaxH~K_M zd^shSo_%{kB{zdVD09Q5^xHvNE^Z!Ui?3TeQ5@gk78my{_6OMzPtm`Fj7^96S4hr1 zi(7x~;NVD+tz+Z(z_T-ThVUh3F=TC4#8G;YB9(95de_!yZ9n+V$vw&SWOecXE{X&xDjd*4l(&)gIDo=uw1-W&IxzgIj^7U2G2adpJR>7@C@i0DMUyGLVL zzL_+SkBQsU(TL;Y@rZ{fVjY~Hh3`~314-T=LqAdYe z$A@DNzpzRIvZhe2LU%i6%Qo?Th zxj#&a?YZx77CVKm)g5A{Vs1GdB(qmn&weGC{a!G87cOQhr&s2W1Y%dJ8si^dahPgLt*^7CA^N49a*?(Dx5QnfyJ_E%%=53o)ISZ@SaK@1>VpH-DjWknON zxi8n>^+pZklL6$90>~pb#eO*xWK}VgpdPuYn$cqc>Yn@gqAvh79V%lc)ToY1&@R-V zy%vfvKznh`CDAGiu>|tPHPsBB2_+do?z=e-X)`f6ucDGmAotx|4O!a575$?C^7LgO zD@wrx@^lU4fdF#P_7s=nlLIlEfdePiV*>HyKs8rK0*E6a#JMVnixr#+;JF&W7XrW& zA>i>_Vv7lQnxRHbu#VqS&D`IIA`7wh6sBV}1_f<6efMWmxieRH?J4lQaP#F4w=Q`y z6<$C`*VDL!&d&ny(x+v?jdlW98A#s667#t|52S*GG3h01j0*IMjK z_0+!!jNljm%IP-(s4e6}nQ^pZHE0h9Xs?85C6>g=2(2bma+ngUX6hRO%$X48g<6;y zHw_72FVq0O8~{$?*X@s|NP50CwuLVc1xIIoKKF zI!AtTeKo_+1;|f@$Y+3D4(u~CJT10!#4|OBCj!J*Lc}A3mqVknA(}To$$_&U4sj?y2T>?-v6X4`I(Hu-%bZt3y3U zIa`B*+b&qsD_7BgnwqU;=uI~EWYK*BC&;Ja> z^Zbs$9!}2$QMtoUkWSYieP$n5-VZ{gCq`msNG#~4@Khi^4G7Ubm2^73uR?$K(gJQW~62V}(ZH|s&J zjJX{9%^K|E0ru-5_HjIZtTD)GXBwf9K9kOEPj?+3>&^f6mELh&^mhD1gyyRJg=Dnp3CXq^W$u6l#(i#qsfKgL^kHiUYV#fx$gIQ42l& zY-J`(=KJ!8C#r#W{#)*q&xOF}>j17cCEiL%gD+ISH%(+=knGNp~PC389;^HxaaB*;VsW4xHE)4q!s)5(q1IzJl#AJ`oj z)6@6O06hIL@FKuZ-&YO1dw*a|A@I~D4jdlgWv8DJMrVVePw+C0M zwd`Et4PwQ$Ydr4*SXbp8?FYvRFLaan)z4clQu-y@sJH~a0+foaXo6CwjV3qE^+D7T zLE^jX`Y0sQy2xes2ZgJ>9Z!(z4dB7aW4+VskX?A67ZZpb_Y$6RWdXSCqVZjSOuNqL z_cMuFGd(Cs7^*u6{{NI+%8L4BEKMHNB&Jb!ME+$g4U67cd3hZHto+U;R?TceSnv-H zB7W+P$icr^D3|bcR|plaY+{b<1__Vf;`y^#arxm+JgIJ?(L0|QAg20rshI1qh{}gV zF}HOuHxA{|yQ3H>UN;>dr<>6oy*sMj8sPEK2i_G-d$?3f`R5|>q&%v4_!RTnBa4$sw(w5|YZ`qQo$C|=NqXgAZ3gMJRA>u}5ICrh=wT|&>{ z)ObqWzC5!#)eRH9eM|V5Wqo*AA>FlA$gf7g;m^dR;qlJs)TUEes%T4<&TePJ;E>np zIXZp=pd#a!FFN>Th77;EQN!;J!>`lxC{s-SJE8CkJ{)>2p5@_*_5b641bwL>4B%Id F{vT}G{2u@S literal 0 HcmV?d00001 diff --git a/crates/core/tests/data_err_logs/table_i/_delta_log/00000000000000000010.json b/crates/core/tests/data_err_logs/table_i/_delta_log/00000000000000000010.json new file mode 100644 index 0000000000..c1f8c8b095 --- /dev/null +++ b/crates/core/tests/data_err_logs/table_i/_delta_log/00000000000000000010.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317342605,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":9,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4818"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"ba4b200d-e0ac-4715-ad4e-bed8ef1b20cd"}} +{"add":{"path":"part-00001-55fbea2e-0788-438a-a50c-65f809acc05b-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317342581,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":90},\"maxValues\":{\"id\":90},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-0d891eaf-0e01-46a6-879e-49bbca90c215-c000.snappy.parquet","partitionValues":{},"size":451,"modificationTime":1742317342581,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":91},\"maxValues\":{\"id\":91},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-569d12e9-04d5-4fe2-9554-f288047f3386-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317342592,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":92},\"maxValues\":{\"id\":92},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-1bc89ec9-8092-49e1-9b1f-123ae50e3d40-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317342579,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":93},\"maxValues\":{\"id\":93},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-b4a223d6-1d87-49c9-84c9-a85eece61839-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317342591,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":94},\"maxValues\":{\"id\":94},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-4c3b6be7-979c-4f42-8920-efa32b751d97-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317342578,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":95},\"maxValues\":{\"id\":95},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-6c224359-8995-417a-8b24-b2e530327bc6-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317342581,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":96},\"maxValues\":{\"id\":96},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-2a40eb21-34d2-48ca-aaa5-55db674f56de-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317342590,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":97},\"maxValues\":{\"id\":97},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-ca8256ed-98cd-460d-8de2-9f6f7f388703-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317342579,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":98},\"maxValues\":{\"id\":98},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-f131fc78-c201-4e8d-b194-222b2e79778d-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317342578,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":99},\"maxValues\":{\"id\":99},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_i/_delta_log/00000000000000000011.json b/crates/core/tests/data_err_logs/table_i/_delta_log/00000000000000000011.json new file mode 100644 index 0000000000..05863f6cd0 --- /dev/null +++ b/crates/core/tests/data_err_logs/table_i/_delta_log/00000000000000000011.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317349152,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":10,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4818"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"1657fc30-879d-4b0b-972a-4e3a079fdd7a"}} +{"add":{"path":"part-00001-ceaadd5e-615b-455d-8f4b-052b9c94c7b6-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349136,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":100},\"maxValues\":{\"id\":100},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-b7dba1e7-b1e5-4f02-a223-69ec7353ab45-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349123,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":101},\"maxValues\":{\"id\":101},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-f099cf4d-d418-4852-8580-091908847a66-c000.snappy.parquet","partitionValues":{},"size":451,"modificationTime":1742317349122,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":102},\"maxValues\":{\"id\":102},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-418e8d25-7316-442a-9bc8-616ed01231eb-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349136,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":103},\"maxValues\":{\"id\":103},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-7538a9c2-1ccb-4150-b162-ef8d826fe30f-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349123,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":104},\"maxValues\":{\"id\":104},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-7619f42c-5bc4-4e77-b037-f36481c8b63c-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349123,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":105},\"maxValues\":{\"id\":105},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-eb49d85f-91cc-4293-9339-a664ee905b0f-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349134,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":106},\"maxValues\":{\"id\":106},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-81e22719-7705-4703-b2dd-c4e2982217a7-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349122,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":107},\"maxValues\":{\"id\":107},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-883858d1-9df6-4b55-a2be-5b8387134617-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349122,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":108},\"maxValues\":{\"id\":108},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-07d91938-ac89-48cc-a657-6067d2d9f67e-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349137,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":109},\"maxValues\":{\"id\":109},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_i/_delta_log/00000000000000000012.json b/crates/core/tests/data_err_logs/table_i/_delta_log/00000000000000000012.json new file mode 100644 index 0000000000..4cc44fa8e8 --- /dev/null +++ b/crates/core/tests/data_err_logs/table_i/_delta_log/00000000000000000012.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317349950,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":11,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4818"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"9a035bdd-f892-4449-9c39-401f31fcada6"}} +{"add":{"path":"part-00001-f3b19100-b5b3-4e72-8658-7a937e9ed515-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349924,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":110},\"maxValues\":{\"id\":110},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-54f2324a-e97f-4def-9101-9cc10599ba06-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349919,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":111},\"maxValues\":{\"id\":111},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-3f7ca40a-6497-4208-8a1a-11062456a5a9-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349936,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":112},\"maxValues\":{\"id\":112},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-a14852b2-c743-4a4a-b9c1-0c9472c51699-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349929,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":113},\"maxValues\":{\"id\":113},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-97d06207-5584-43df-afc2-2d1738d79193-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349943,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":114},\"maxValues\":{\"id\":114},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-0d431f03-6dbf-40e7-96fc-b1ebbbe9fc65-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349922,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":115},\"maxValues\":{\"id\":115},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-af0f0232-33c8-4315-821b-8bb1323b7a26-c000.snappy.parquet","partitionValues":{},"size":451,"modificationTime":1742317349936,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":116},\"maxValues\":{\"id\":116},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-4f744428-d088-497e-afd3-0b374e453e7c-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349936,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":117},\"maxValues\":{\"id\":117},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-694064b8-137e-45cd-b2ea-e28af172a2dc-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349918,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":118},\"maxValues\":{\"id\":118},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-56073753-4c1c-4a68-9b4a-13ef5d1a75fb-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317349938,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":119},\"maxValues\":{\"id\":119},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_i/_delta_log/00000000000000000013.json b/crates/core/tests/data_err_logs/table_i/_delta_log/00000000000000000013.json new file mode 100644 index 0000000000..b2d03d3ead --- /dev/null +++ b/crates/core/tests/data_err_logs/table_i/_delta_log/00000000000000000013.json @@ -0,0 +1,11 @@ +{"commitInfo":{"timestamp":1742317350712,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":12,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"11","numOutputRows":"10","numOutputBytes":"4819"},"engineInfo":"Apache-Spark/3.5.4 Delta-Lake/3.3.0","txnId":"c3cd0fa1-9c72-4344-8225-0b787e52d5e0"}} +{"add":{"path":"part-00001-7a0d95f8-e122-4cf6-b89c-389036a9b415-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317350696,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":120},\"maxValues\":{\"id\":120},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00002-f1f035c1-bf0f-485c-950d-c81d0d2aa8a2-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317350706,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":121},\"maxValues\":{\"id\":121},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00003-d7a51e45-70f3-4379-819b-341951abefff-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317350691,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":122},\"maxValues\":{\"id\":122},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00004-4828722c-5799-4be1-ace1-14bd7f477dbf-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317350691,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":123},\"maxValues\":{\"id\":123},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00005-f3c3c72e-5d71-4dc9-9e15-342f1d6cb6cc-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317350701,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":124},\"maxValues\":{\"id\":124},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00007-e8d74ede-8876-4f55-8e9f-1bbde0d07a35-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317350696,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":125},\"maxValues\":{\"id\":125},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00008-91052146-2292-45c3-b57e-1fd2dd6be6ed-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317350692,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":126},\"maxValues\":{\"id\":126},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00009-9d203964-9f4c-4c84-ad77-9ba305bb6572-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317350706,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":127},\"maxValues\":{\"id\":127},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00010-8362228b-acf6-4937-875b-26c013c342e1-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317350690,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":128},\"maxValues\":{\"id\":128},\"nullCount\":{\"id\":0}}"}} +{"add":{"path":"part-00011-6f57658e-1953-4b59-b504-27c9e8c5cc3b-c000.snappy.parquet","partitionValues":{},"size":452,"modificationTime":1742317350677,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":129},\"maxValues\":{\"id\":129},\"nullCount\":{\"id\":0}}"}} diff --git a/crates/core/tests/data_err_logs/table_i/_delta_log/00000000000000009999.checkpoint.parquet b/crates/core/tests/data_err_logs/table_i/_delta_log/00000000000000009999.checkpoint.parquet new file mode 100644 index 0000000000000000000000000000000000000000..4d9437c57f0dbf93d505b3eeb380285c99f834f9 GIT binary patch literal 11644 zcmdT~eQaCR6@PYI=gUn~n)_;3ZV7>!u+_n7zB2CBd(OG{p6lKB@2Mvealq%*@jv`vg`3#n(N0Pv$tp=wk#xP6 z6{LcE|A%uwDl>5wQBGvl;qzkez;{=g-cnqf4``H@hNghl(&kkaP4PDR0?ppG#!jEN zp}s|FX;FL)YG}-*;rJ(xU-?y07Lf%Y@a?M?7JhAhn^yGGdRwT0 zu8zHNp_q+}8`=ieDcZsZf1|@YV0ra<7EQ+sy4=;De)vvl77ev}$}jV4nE&l(LW%q;ep+uICy$eN z|K9(q*&S)FvfKf7Z)t054c0d|d4mC8GvumP)f)&XKCjjkY}A?>HV2f=m`G9i7p8uB zgij#zX$xkQ{BLVW4LLXd@!<+K`78-rFehc^z+3B|b(2P(RqM)wI_n^mtEDuh~@78PdWSj9-J;R3i8af^bufK^BVYT9+1w4vah{ z5C#I^)F47NT~QTXfv`Pv=<67Q=!bo?M}E7CjWu%&)LI+J26FDt$6w(oh}qIARUGFD zcpuqEE`R63zxfz4k*v}>7>cTeb)~LTQ}mv=Ru=?S!m+N{lTl3tT^ivt_m`I@xEv9o z6c7?U-QBUcP8q656*>RzKVQyT7_1n{fQSUo{b!(;kB-^ZiZclW?eIDL#>dwd5$TW| zKxe4A%TO~;;^duwoc=GYrDXtNCrnKT%^wR{JY|5Pba!h}byqZe5T8QP14=xkKt=?B zys7L4U%FOXTuuqT#Ojjm()Av#E~js)n}8c&L1k=(C{w~#f0vUt88hfw!q$CL)AZT` zW4BmX$C3Y#(U6xMVE@zo3Zx@uxGM&&ZmymWDwQIS z1|6JY+QEB|t>nFvE4hsR3NB-&f)fm`;?3z*oOZa9)6P|Lg3;BynM^l_ZsENb)6M=h zymu>oc~NH!rUxpD>777w;ZlM|K)~*Vd6|8O0Gd z2^if;H|a2%BkOtdgY{&;(L1?;3^BfsZspb-tLDw5(F9jr*t2cT16rrj6V|`1#iLqy zZ*adBQHVoovO0_Or=qYyA&!lba|QVFc6Fn)(+xgIY|vADK#M0ru_!)So#kOAp?|d- zHi()^c@}94ZL&E_pU@6|;{YBHi7mxk z1U*cw0|c$0Z&?p2VoQX+rKNPLjKtJXX9&U*&>s#(pj!fpGTgUr-LFKuG}NsE5{gFU zYe790CuJ!~xQbfqh?yy04%r2!?O?TJ0PT}@ejt(uxY=-L{KL_+#L zHZRm%30=_>hR&dZJ+4J!2Q(sQPYs>N)yy_%J>ZRWXPXdgYIRm=PwB8L)zk+=VeKo% z*#HOm|D9eiT55W84Q9Fa{M3Abn!4bL1T{P9zbvoAUAow0aC(OGPjbjM={LaG*L6jo<(jIu1rOCdXC1SRU{NYF&GSC5CH zT?}4wR`S^28VmPCqA16{)1L}{TR4=^2^@V9pcrB_qD6H!D4Jw~aX9^HDvfN&BrAil zXlJOaCr;yiDS;)Zj1~r4=&@id3}sUq&XrK8+M_989Uf2^<=Fj5Jfx?)(ThMYAbc=& zHZ6Ei$oVA2x)Dw_7EwY`7J4jE+f&`lY-q|%GqE?A7>iTcK$T>>%k5fd_LR7Z!_`zG z*VX5opa6>`O_!%bPEj2&++MAvmwr+guXk- zy^?3#mMI}?LqMhNkaqwjNMp5tbPJ_r_nM_$L}`;$+5$+EnAgL2O#2Kf2R(NI{R#VHgvDdx=y$DUXr*!uZ4z&o`O=kH2>JYiGAA+Ly# zg9!OHg*;_Qen;{N$$}VX48bZxcsJZ-5=B*URn-lT=hc%u|A33w*&ryGq$f+#7%%p=TM3Uh+yQy!KAj45)^BTtCw^~H7o zN>ZSC05V5Tdhfu8GcU3aBm0p%(LCc&$~LnvwrAvEDn*(GB$gfxBr!vxGSebN-~#t@31CK08|oCePELxTrxA3Dg7(8rPobbf!1q@Ox=tbN zWePh3u)-{)S4&)1W<=&$WbXYUokF;?U=eDJ)D)!1J-A8`eb^7&Cn@(d<)%r?U>5RL z<{m;sI*v%^DAI7HXd3}ZFvxJFpw$V)nx$BC6f3p(7-|)=M9ztjzeC99H=&tES93G9 z=7wBsl^k6yD0%@=M<{9%P}wS(8&zB=lOozBL_0&#hHkkYTF$j{=oUf2XYT>X847t3 zAPv`AWY{^E%Zno9074%2q9yy+aL8I?%sE!5{xyP9rx0O~B1~l?Tz8S067kL=-dhx} zw~EhgEk8WNp_wKMDBc~lczUb+g%$D(qk;vbplgA`(9Eg*;ysYsr@iDPZ1e}<#|133 z`MBumpQpe9BAEFKn&tVsaiY%WVmH?cILdhu<>A=VQygn_2XV{w6}h^nEV% zHoMr&y%1J!`sj{n7OEP?D$XY@t5!wpQ>-pUXAzxY)+IQ@Rdj|6o6#A@*QN6!$1OKi zu_($;c_&c9ag^PIrCVU<9KGgI^xg;3<~QTv_ZmnO2a#F($q^@T$MCELSpE6#+48*u z74zKC<>NFDW8^-VjMo~-_aMg|aTh)gQ)C9@0I>Q^7ks8BXqmqFGxeKwIE|)!vypVb z4jNQ27V%nM8O*8((%I{fJAmAux;sq$55nk8V(Q5(VhTkpG?F^WGqQdW5gFA*E@EUo zh`@Wu)c+_V{bUv~iz3FFP{aoyf;j{$ZwjT#Y@gmQ?*~SI<|Sw569~_^0?mADR_5>) zQ077tc}Vh1ZpfibX7y84rc8#LtRV}q`kQUFrW;hJ>1x$1?)Vn)qLF6upyc`JRuq@Y z!xfpOQlmslO^XMRk8Twr_#|pQ*NhQ7R=p@pW@vG=v1$SB45A%vL9`^GvEz)O6f4DY zoTP{|jX0x-BOkv_wtGt8PZeAUZ*qD6p2?G_kz{+ zb@dH(_0_e3o={k=Rs)Uotqt|QHnl0x($J^{o7x(DYHN!x2tORIZ?4zy=Xh#BxeLnB SaL4!=|4RP#lC&Fscl|#;+nrYc literal 0 HcmV?d00001 diff --git a/crates/core/tests/data_err_logs/table_i/_delta_log/00000000000000009999.json b/crates/core/tests/data_err_logs/table_i/_delta_log/00000000000000009999.json new file mode 100644 index 0000000000..37d128c02c --- /dev/null +++ b/crates/core/tests/data_err_logs/table_i/_delta_log/00000000000000009999.json @@ -0,0 +1,4 @@ +{"commitInfo":{"timestamp":1742317197774,"engineInfo":"Kernel-3.4.0-SNAPSHOT/test-engine","operation":"CREATE TABLE","operationParameters":{"partitionBy":"[]"},"isBlindAppend":true,"txnId":"09e63766-07ed-4b57-a6e0-0bba30aa801e","operationMetrics":{}}} +{"metaData":{"id":"7998c165-cb06-408d-bba0-e5c4e532ba21","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"createdTime":1742317196814,"configuration":{"delta.feature.catalogowned":"supported"}}} +{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["catalogOwned"],"writerFeatures":["catalogOwned","appendOnly","invariants"]}} +{"add":{"path":"a735be79-daea-40b6-94f0-317a77a03df6-000.parquet","partitionValues":{},"size":1918,"modificationTime":1742317197691,"dataChange":true,"stats":"{\"numRecords\":600,\"minValues\":{\"id\":16},\"maxValues\":{\"id\":3251},\"nullCounts\":{\"id\":30}}"}} diff --git a/crates/core/tests/data_err_logs/table_i/_delta_log/_last_checkpoint b/crates/core/tests/data_err_logs/table_i/_delta_log/_last_checkpoint new file mode 100644 index 0000000000..604c2b31a2 --- /dev/null +++ b/crates/core/tests/data_err_logs/table_i/_delta_log/_last_checkpoint @@ -0,0 +1 @@ +{"version":10,"size":102,"sizeInBytes":21615,"numOfAddFiles":100,"checkpointSchema":{"type":"struct","fields":[{"name":"txn","type":{"type":"struct","fields":[{"name":"appId","type":"string","nullable":true,"metadata":{}},{"name":"version","type":"long","nullable":true,"metadata":{}},{"name":"lastUpdated","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"add","type":{"type":"struct","fields":[{"name":"path","type":"string","nullable":true,"metadata":{}},{"name":"partitionValues","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"size","type":"long","nullable":true,"metadata":{}},{"name":"modificationTime","type":"long","nullable":true,"metadata":{}},{"name":"dataChange","type":"boolean","nullable":true,"metadata":{}},{"name":"tags","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"deletionVector","type":{"type":"struct","fields":[{"name":"storageType","type":"string","nullable":true,"metadata":{}},{"name":"pathOrInlineDv","type":"string","nullable":true,"metadata":{}},{"name":"offset","type":"integer","nullable":true,"metadata":{}},{"name":"sizeInBytes","type":"integer","nullable":true,"metadata":{}},{"name":"cardinality","type":"long","nullable":true,"metadata":{}},{"name":"maxRowIndex","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"baseRowId","type":"long","nullable":true,"metadata":{}},{"name":"defaultRowCommitVersion","type":"long","nullable":true,"metadata":{}},{"name":"clusteringProvider","type":"string","nullable":true,"metadata":{}},{"name":"stats","type":"string","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"remove","type":{"type":"struct","fields":[{"name":"path","type":"string","nullable":true,"metadata":{}},{"name":"deletionTimestamp","type":"long","nullable":true,"metadata":{}},{"name":"dataChange","type":"boolean","nullable":true,"metadata":{}},{"name":"extendedFileMetadata","type":"boolean","nullable":true,"metadata":{}},{"name":"partitionValues","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"size","type":"long","nullable":true,"metadata":{}},{"name":"deletionVector","type":{"type":"struct","fields":[{"name":"storageType","type":"string","nullable":true,"metadata":{}},{"name":"pathOrInlineDv","type":"string","nullable":true,"metadata":{}},{"name":"offset","type":"integer","nullable":true,"metadata":{}},{"name":"sizeInBytes","type":"integer","nullable":true,"metadata":{}},{"name":"cardinality","type":"long","nullable":true,"metadata":{}},{"name":"maxRowIndex","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"baseRowId","type":"long","nullable":true,"metadata":{}},{"name":"defaultRowCommitVersion","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"metaData","type":{"type":"struct","fields":[{"name":"id","type":"string","nullable":true,"metadata":{}},{"name":"name","type":"string","nullable":true,"metadata":{}},{"name":"description","type":"string","nullable":true,"metadata":{}},{"name":"format","type":{"type":"struct","fields":[{"name":"provider","type":"string","nullable":true,"metadata":{}},{"name":"options","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"schemaString","type":"string","nullable":true,"metadata":{}},{"name":"partitionColumns","type":{"type":"array","elementType":"string","containsNull":true},"nullable":true,"metadata":{}},{"name":"configuration","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"createdTime","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"protocol","type":{"type":"struct","fields":[{"name":"minReaderVersion","type":"integer","nullable":true,"metadata":{}},{"name":"minWriterVersion","type":"integer","nullable":true,"metadata":{}},{"name":"readerFeatures","type":{"type":"array","elementType":"string","containsNull":true},"nullable":true,"metadata":{}},{"name":"writerFeatures","type":{"type":"array","elementType":"string","containsNull":true},"nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"domainMetadata","type":{"type":"struct","fields":[{"name":"domain","type":"string","nullable":true,"metadata":{}},{"name":"configuration","type":"string","nullable":true,"metadata":{}},{"name":"removed","type":"boolean","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}}]},"checksum":"94a578f92841fa7ba9cdee96b5905fdb"} diff --git a/crates/core/tests/exotic_tables.rs b/crates/core/tests/exotic_tables.rs new file mode 100644 index 0000000000..72b4375820 --- /dev/null +++ b/crates/core/tests/exotic_tables.rs @@ -0,0 +1,30 @@ +#[tokio::test] +async fn test_exotic_tables() { + let dir = env!("CARGO_MANIFEST_DIR"); + let data_path = std::path::Path::new(dir).join("tests/data/exotic_logs"); + let full = data_path.canonicalize().unwrap(); + + let cases = vec![ + // ("table_a", false), + // ("table_b", false), + // ("table_c", true), + // ("table_d", true), + // ("table_e", true), + // ("table_f", true), + ("table_g", false), + // ("table_h", true), + // ("table_i", true), + ]; + + for (name, should_error) in cases { + let table_path = full.join(name); + println!("{table_path:?}"); + let table = deltalake_core::open_table(&table_path.to_string_lossy()).await; + println!("table: {:?}", table); + if should_error { + assert!(table.is_err()); + } else { + assert!(table.is_ok()); + } + } +} diff --git a/python/src/schema.rs b/python/src/schema.rs index b93fdfc219..5824ba71b9 100644 --- a/python/src/schema.rs +++ b/python/src/schema.rs @@ -8,7 +8,7 @@ use deltalake::arrow::error::ArrowError; use deltalake::arrow::pyarrow::PyArrowType; use deltalake::kernel::{ ArrayType as DeltaArrayType, DataType, MapType as DeltaMapType, MetadataValue, - PrimitiveType as DeltaPrimitve, StructField, StructType as DeltaStructType, StructTypeExt, + PrimitiveType as DeltaPrimitive, StructField, StructType as DeltaStructType, StructTypeExt, }; use pyo3::exceptions::{PyException, PyNotImplementedError, PyTypeError, PyValueError}; use pyo3::{prelude::*, IntoPyObjectExt}; @@ -67,7 +67,7 @@ fn python_type_to_schema(ob: &Bound<'_, PyAny>) -> PyResult { #[pyclass(module = "deltalake._internal")] #[derive(Clone)] pub struct PrimitiveType { - inner_type: DeltaPrimitve, + inner_type: DeltaPrimitive, } impl TryFrom for PrimitiveType { @@ -85,7 +85,7 @@ impl PrimitiveType { #[new] #[pyo3(signature = (data_type))] fn new(data_type: String) -> PyResult { - let data_type: DeltaPrimitve = + let data_type: DeltaPrimitive = serde_json::from_str(&format!("\"{data_type}\"")).map_err(|_| { if data_type.starts_with("decimal") { PyValueError::new_err(format!( @@ -441,12 +441,7 @@ impl Field { match v { serde_json::Value::Number(n) => n.as_i64().map_or_else( || MetadataValue::String(v.to_string()), - |i| { - i32::try_from(i) - .ok() - .map(MetadataValue::Number) - .unwrap_or_else(|| MetadataValue::String(v.to_string())) - }, + |i| MetadataValue::Number(i), ), serde_json::Value::String(s) => MetadataValue::String(s.to_string()), other => MetadataValue::String(other.to_string()), From 6c7170564564d40702940420093ed1d63145d4eb Mon Sep 17 00:00:00 2001 From: Robert Pack Date: Tue, 8 Apr 2025 20:59:12 +0200 Subject: [PATCH 16/23] fix: update to latest kernel state Signed-off-by: Robert Pack --- .gitignore | 4 +- Cargo.toml | 2 +- crates/core/src/kernel/mod.rs | 6 +- crates/core/src/kernel/models/actions.rs | 82 +++++++------ crates/core/src/kernel/snapshot/log_data.rs | 6 +- .../core/src/kernel/snapshot/log_segment.rs | 8 +- crates/core/src/kernel/snapshot/mod.rs | 1 - crates/core/src/kernel/snapshot/parse.rs | 6 +- crates/core/src/operations/add_feature.rs | 14 +-- crates/core/src/operations/cdc.rs | 6 +- crates/core/src/operations/constraints.rs | 4 +- crates/core/src/operations/transaction/mod.rs | 12 +- .../src/operations/transaction/protocol.rs | 116 +++++++++--------- crates/core/src/protocol/parquet_read/mod.rs | 6 +- .../core/src/test_utils/factories/actions.rs | 6 +- crates/core/tests/exotic_tables.rs | 22 ++-- 16 files changed, 153 insertions(+), 148 deletions(-) diff --git a/.gitignore b/.gitignore index 18dcc39f69..f23e3b772d 100644 --- a/.gitignore +++ b/.gitignore @@ -32,4 +32,6 @@ Cargo.lock justfile site -__pycache__ \ No newline at end of file +__pycache__ +.zed +.zed/ diff --git a/Cargo.toml b/Cargo.toml index ba562f3d07..003d054923 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,7 +27,7 @@ debug = "line-tables-only" [workspace.dependencies] # delta_kernel = { version = "0.8.0", features = ["arrow_54", "default-engine-rustls"] } -delta_kernel = { path = "../delta-kernel-rs/kernel", features = [ +delta_kernel = { git = "https://github.com/hntd187/delta-kernel-rs", rev = "00d1d9c8169c5cd7901682be2024687f79e101e3", features = [ "arrow_54", "developer-visibility", "default-engine-rustls", diff --git a/crates/core/src/kernel/mod.rs b/crates/core/src/kernel/mod.rs index 44a09d7745..efac04489d 100644 --- a/crates/core/src/kernel/mod.rs +++ b/crates/core/src/kernel/mod.rs @@ -2,7 +2,7 @@ //! //! The Kernel module contains all the logic for reading and processing the Delta Lake transaction log. -use delta_kernel::engine::arrow_expression::ArrowExpressionHandler; +use delta_kernel::engine::arrow_expression::ArrowEvaluationHandler; use std::{any::Any, sync::LazyLock}; pub mod arrow; @@ -25,5 +25,5 @@ pub trait DataCheck { fn as_any(&self) -> &dyn Any; } -static ARROW_HANDLER: LazyLock = - LazyLock::new(|| ArrowExpressionHandler {}); +static ARROW_HANDLER: LazyLock = + LazyLock::new(|| ArrowEvaluationHandler {}); diff --git a/crates/core/src/kernel/models/actions.rs b/crates/core/src/kernel/models/actions.rs index 643edfe4d5..dc4259f0cb 100644 --- a/crates/core/src/kernel/models/actions.rs +++ b/crates/core/src/kernel/models/actions.rs @@ -3,7 +3,7 @@ use std::fmt::{self, Display}; use std::str::FromStr; use delta_kernel::schema::{DataType, StructField}; -use delta_kernel::table_features::{ReaderFeatures, WriterFeatures}; +use delta_kernel::table_features::{ReaderFeature, WriterFeature}; use maplit::hashset; use serde::{Deserialize, Serialize}; use tracing::warn; @@ -149,11 +149,11 @@ pub struct Protocol { /// A collection of features that a client must implement in order to correctly /// read this table (exist only when minReaderVersion is set to 3) #[serde(skip_serializing_if = "Option::is_none")] - pub reader_features: Option>, + pub reader_features: Option>, /// A collection of features that a client must implement in order to correctly /// write this table (exist only when minWriterVersion is set to 7) #[serde(skip_serializing_if = "Option::is_none")] - pub writer_features: Option>, + pub writer_features: Option>, } impl Protocol { @@ -170,7 +170,7 @@ impl Protocol { /// Append the reader features in the protocol action, automatically bumps min_reader_version pub fn append_reader_features( mut self, - reader_features: impl IntoIterator>, + reader_features: impl IntoIterator>, ) -> Self { let all_reader_features = reader_features .into_iter() @@ -192,7 +192,7 @@ impl Protocol { /// Append the writer features in the protocol action, automatically bumps min_writer_version pub fn append_writer_features( mut self, - writer_features: impl IntoIterator>, + writer_features: impl IntoIterator>, ) -> Self { let all_writer_features = writer_features .into_iter() @@ -235,27 +235,27 @@ impl Protocol { }) .filter_map(|(key, value)| match key.as_str() { "delta.enableChangeDataFeed" if parse_bool(value) => { - Some(WriterFeatures::ChangeDataFeed) + Some(WriterFeature::ChangeDataFeed) } - "delta.appendOnly" if parse_bool(value) => Some(WriterFeatures::AppendOnly), + "delta.appendOnly" if parse_bool(value) => Some(WriterFeature::AppendOnly), "delta.enableDeletionVectors" if parse_bool(value) => { - Some(WriterFeatures::DeletionVectors) + Some(WriterFeature::DeletionVectors) } "delta.enableRowTracking" if parse_bool(value) => { - Some(WriterFeatures::RowTracking) + Some(WriterFeature::RowTracking) } "delta.checkpointPolicy" if value.clone().unwrap_or_default() == "v2" => { - Some(WriterFeatures::V2Checkpoint) + Some(WriterFeature::V2Checkpoint) } _ => None, }) - .collect::>(); + .collect::>(); if configuration .keys() .any(|v| v.starts_with("delta.constraints.")) { - converted_writer_features.insert(WriterFeatures::CheckConstraints); + converted_writer_features.insert(WriterFeature::CheckConstraints); } match self.writer_features { @@ -271,14 +271,14 @@ impl Protocol { .iter() .filter_map(|(key, value)| match key.as_str() { "delta.enableDeletionVectors" if parse_bool(value) => { - Some(ReaderFeatures::DeletionVectors) + Some(ReaderFeature::DeletionVectors) } "delta.checkpointPolicy" if value.clone().unwrap_or_default() == "v2" => { - Some(ReaderFeatures::V2Checkpoint) + Some(ReaderFeature::V2Checkpoint) } _ => None, }) - .collect::>(); + .collect::>(); match self.reader_features { Some(mut features) => { features.extend(converted_reader_features); @@ -382,12 +382,12 @@ impl Protocol { if self.min_writer_version >= 7 { match self.writer_features { Some(mut features) => { - features.insert(WriterFeatures::ChangeDataFeed); + features.insert(WriterFeature::ChangeDataFeed); self.writer_features = Some(features); } None => { self.writer_features = - Some(hashset! {WriterFeatures::ChangeDataFeed}) + Some(hashset! {WriterFeature::ChangeDataFeed}) } } } else if self.min_writer_version <= 3 { @@ -407,17 +407,17 @@ impl Protocol { Ok(true) => { let writer_features = match self.writer_features { Some(mut features) => { - features.insert(WriterFeatures::DeletionVectors); + features.insert(WriterFeature::DeletionVectors); features } - None => hashset! {WriterFeatures::DeletionVectors}, + None => hashset! {WriterFeature::DeletionVectors}, }; let reader_features = match self.reader_features { Some(mut features) => { - features.insert(ReaderFeatures::DeletionVectors); + features.insert(ReaderFeature::DeletionVectors); features } - None => hashset! {ReaderFeatures::DeletionVectors}, + None => hashset! {ReaderFeature::DeletionVectors}, }; self.min_reader_version = 3; self.min_writer_version = 7; @@ -440,8 +440,8 @@ impl Protocol { /// Enable timestamp_ntz in the protocol fn enable_timestamp_ntz(mut self) -> Self { - self = self.append_reader_features([ReaderFeatures::TimestampWithoutTimezone]); - self = self.append_writer_features([WriterFeatures::TimestampWithoutTimezone]); + self = self.append_reader_features([ReaderFeature::TimestampWithoutTimezone]); + self = self.append_writer_features([WriterFeature::TimestampWithoutTimezone]); self } @@ -451,7 +451,7 @@ impl Protocol { self.min_writer_version = 4; } if self.min_writer_version >= 7 { - self = self.append_writer_features([WriterFeatures::GeneratedColumns]); + self = self.append_writer_features([WriterFeature::GeneratedColumns]); } self } @@ -459,7 +459,7 @@ impl Protocol { /// Enabled generated columns fn enable_invariants(mut self) -> Self { if self.min_writer_version >= 7 { - self = self.append_writer_features([WriterFeatures::Invariants]); + self = self.append_writer_features([WriterFeature::Invariants]); } self } @@ -547,27 +547,37 @@ impl fmt::Display for TableFeatures { } } -impl TryFrom<&TableFeatures> for ReaderFeatures { +impl TryFrom<&TableFeatures> for ReaderFeature { type Error = strum::ParseError; fn try_from(value: &TableFeatures) -> Result { - ReaderFeatures::try_from(value.as_ref()) + ReaderFeature::try_from(value.as_ref()) } } -impl TryFrom<&TableFeatures> for WriterFeatures { +impl TryFrom<&TableFeatures> for WriterFeature { type Error = strum::ParseError; fn try_from(value: &TableFeatures) -> Result { - WriterFeatures::try_from(value.as_ref()) + WriterFeature::try_from(value.as_ref()) } } impl TableFeatures { /// Convert table feature to respective reader or/and write feature - pub fn to_reader_writer_features(&self) -> (Option, Option) { - let reader_feature = ReaderFeatures::try_from(self).ok(); - let writer_feature = WriterFeatures::try_from(self).ok(); + pub fn to_reader_writer_features(&self) -> (Option, Option) { + let reader_feature = ReaderFeature::try_from(self) + .ok() + .and_then(|feature| match feature { + ReaderFeature::Unknown(_) => None, + _ => Some(feature), + }); + let writer_feature = WriterFeature::try_from(self) + .ok() + .and_then(|feature| match feature { + WriterFeature::Unknown(_) => None, + _ => Some(feature), + }); (reader_feature, writer_feature) } } @@ -1227,14 +1237,14 @@ mod tests { assert_eq!(protocol.min_writer_version, 7); assert_eq!( protocol.reader_features, - Some(hashset! {ReaderFeatures::Unknown("catalogOwned".to_owned())}) + Some(hashset! {ReaderFeature::Unknown("catalogOwned".to_owned())}) ); assert_eq!( protocol.writer_features, Some(hashset! { - WriterFeatures::Unknown("catalogOwned".to_owned()), - WriterFeatures::Invariants, - WriterFeatures::AppendOnly + WriterFeature::Unknown("catalogOwned".to_owned()), + WriterFeature::Invariants, + WriterFeature::AppendOnly }) ); } diff --git a/crates/core/src/kernel/snapshot/log_data.rs b/crates/core/src/kernel/snapshot/log_data.rs index dbe8eff3b6..a2521e955e 100644 --- a/crates/core/src/kernel/snapshot/log_data.rs +++ b/crates/core/src/kernel/snapshot/log_data.rs @@ -565,7 +565,7 @@ mod datafusion { use delta_kernel::engine::arrow_data::ArrowEngineData; use delta_kernel::expressions::Expression; use delta_kernel::schema::{DataType, PrimitiveType}; - use delta_kernel::{ExpressionEvaluator, ExpressionHandler}; + use delta_kernel::{EvaluationHandler, ExpressionEvaluator}; use super::*; use crate::kernel::arrow::extract::{extract_and_cast_opt, extract_column}; @@ -795,7 +795,7 @@ mod datafusion { } else { Expression::column(["add", "stats_parsed", stats_field, &column.name]) }; - let evaluator = ARROW_HANDLER.get_evaluator( + let evaluator = ARROW_HANDLER.new_expression_evaluator( crate::kernel::models::fields::log_schema_ref().clone(), expression, field.data_type().clone(), @@ -867,7 +867,7 @@ mod datafusion { /// Note: the returned array must contain `num_containers()` rows fn row_counts(&self, _column: &Column) -> Option { static ROW_COUNTS_EVAL: LazyLock> = LazyLock::new(|| { - ARROW_HANDLER.get_evaluator( + ARROW_HANDLER.new_expression_evaluator( crate::kernel::models::fields::log_schema_ref().clone(), Expression::column(["add", "stats_parsed", "numRecords"]), DataType::Primitive(PrimitiveType::Long), diff --git a/crates/core/src/kernel/snapshot/log_segment.rs b/crates/core/src/kernel/snapshot/log_segment.rs index 74c43b4837..015317a3ac 100644 --- a/crates/core/src/kernel/snapshot/log_segment.rs +++ b/crates/core/src/kernel/snapshot/log_segment.rs @@ -205,8 +205,6 @@ impl LogSegment { == cfs[1].location.commit_version().unwrap() }); if !is_contiguous { - println!("commit files: {:?}", self.commit_files); - println!("checkpoint files: {:?}", self.checkpoint_files); return Err(DeltaTableError::Generic( "non-contiguous log segment".into(), )); @@ -579,7 +577,7 @@ pub(super) async fn list_log_files( #[cfg(test)] pub(super) mod tests { - use delta_kernel::table_features::{ReaderFeatures, WriterFeatures}; + use delta_kernel::table_features::{ReaderFeature, WriterFeature}; use deltalake_test::utils::*; use maplit::hashset; use tokio::task::JoinHandle; @@ -685,8 +683,8 @@ pub(super) mod tests { let expected = Protocol { min_reader_version: 3, min_writer_version: 7, - reader_features: Some(hashset! {ReaderFeatures::DeletionVectors}), - writer_features: Some(hashset! {WriterFeatures::DeletionVectors}), + reader_features: Some(hashset! {ReaderFeature::DeletionVectors}), + writer_features: Some(hashset! {WriterFeature::DeletionVectors}), }; assert_eq!(protocol, expected); diff --git a/crates/core/src/kernel/snapshot/mod.rs b/crates/core/src/kernel/snapshot/mod.rs index c15ec7c0a2..36c4cd221a 100644 --- a/crates/core/src/kernel/snapshot/mod.rs +++ b/crates/core/src/kernel/snapshot/mod.rs @@ -83,7 +83,6 @@ impl Snapshot { let (metadata, protocol) = (metadata.unwrap(), protocol.unwrap()); let schema = serde_json::from_str(&metadata.schema_string)?; - println!("{:?}", protocol); PROTOCOL.can_read_from_protocol(&protocol)?; Ok(Self { diff --git a/crates/core/src/kernel/snapshot/parse.rs b/crates/core/src/kernel/snapshot/parse.rs index 3bf0ff295a..92d85d46b7 100644 --- a/crates/core/src/kernel/snapshot/parse.rs +++ b/crates/core/src/kernel/snapshot/parse.rs @@ -3,7 +3,7 @@ use arrow_array::{ Array, BooleanArray, Int32Array, Int64Array, ListArray, MapArray, StringArray, StructArray, }; -use delta_kernel::table_features::{ReaderFeatures, WriterFeatures}; +use delta_kernel::table_features::{ReaderFeature, WriterFeature}; use percent_encoding::percent_decode_str; use crate::kernel::arrow::extract::{self as ex, ProvidesColumnByName}; @@ -66,13 +66,13 @@ pub(super) fn read_protocol(batch: &dyn ProvidesColumnByName) -> DeltaResult::try_into(v.as_str())) + .map(|v| TryInto::::try_into(v.as_str())) .filter_map(|v| v.ok()) .collect() }), writer_features: collect_string_list(&maybe_writer_features, idx).map(|v| { v.into_iter() - .map(|v| TryInto::::try_into(v.as_str())) + .map(|v| TryInto::::try_into(v.as_str())) .filter_map(|v| v.ok()) .collect() }), diff --git a/crates/core/src/operations/add_feature.rs b/crates/core/src/operations/add_feature.rs index 31dbb928bf..97777e6b10 100644 --- a/crates/core/src/operations/add_feature.rs +++ b/crates/core/src/operations/add_feature.rs @@ -2,7 +2,7 @@ use std::sync::Arc; -use delta_kernel::table_features::{ReaderFeatures, WriterFeatures}; +use delta_kernel::table_features::{ReaderFeature, WriterFeature}; use futures::future::BoxFuture; use itertools::Itertools; @@ -102,8 +102,8 @@ impl std::future::IntoFuture for AddTableFeatureBuilder { this.pre_execute(operation_id).await?; let (reader_features, writer_features): ( - Vec>, - Vec>, + Vec>, + Vec>, ) = name.iter().map(|v| v.to_reader_writer_features()).unzip(); let reader_features = reader_features.into_iter().flatten().collect_vec(); let writer_features = writer_features.into_iter().flatten().collect_vec(); @@ -157,7 +157,7 @@ mod tests { writer::test_utils::{create_bare_table, get_record_batch}, DeltaOps, }; - use delta_kernel::table_features::{ReaderFeatures, WriterFeatures}; + use delta_kernel::table_features::{ReaderFeature, WriterFeature}; use delta_kernel::DeltaResult; #[tokio::test] @@ -181,7 +181,7 @@ mod tests { .unwrap() .writer_features .unwrap_or_default() - .contains(&WriterFeatures::ChangeDataFeed)); + .contains(&WriterFeature::ChangeDataFeed)); let result = DeltaOps(result) .add_feature() @@ -195,12 +195,12 @@ mod tests { .writer_features .clone() .unwrap_or_default() - .contains(&WriterFeatures::DeletionVectors)); + .contains(&WriterFeature::DeletionVectors)); assert!(¤t_protocol .reader_features .clone() .unwrap_or_default() - .contains(&ReaderFeatures::DeletionVectors)); + .contains(&ReaderFeature::DeletionVectors)); assert_eq!(result.version(), 2); Ok(()) } diff --git a/crates/core/src/operations/cdc.rs b/crates/core/src/operations/cdc.rs index 5e950402b8..4d985dc216 100644 --- a/crates/core/src/operations/cdc.rs +++ b/crates/core/src/operations/cdc.rs @@ -70,7 +70,7 @@ pub(crate) fn should_write_cdc(snapshot: &DeltaTableState) -> DeltaResult // the Option> can get filled with an empty set, checking for the value // explicitly if snapshot.protocol().min_writer_version == 7 - && !features.contains(&delta_kernel::table_features::WriterFeatures::ChangeDataFeed) + && !features.contains(&delta_kernel::table_features::WriterFeature::ChangeDataFeed) { // If the writer feature has not been set, then the table should not have CDC written // to it. Otherwise fallback to the configured table configuration @@ -95,7 +95,7 @@ mod tests { use arrow_schema::Schema; use datafusion::assert_batches_sorted_eq; use datafusion::datasource::{MemTable, TableProvider}; - use delta_kernel::table_features::WriterFeatures; + use delta_kernel::table_features::WriterFeature; /// A simple test which validates primitive writer version 1 tables should /// not write Change Data Files @@ -175,7 +175,7 @@ mod tests { #[tokio::test] async fn test_should_write_cdc_v7_table_with_writer_feature() { let protocol = - Protocol::new(1, 7).append_writer_features(vec![WriterFeatures::ChangeDataFeed]); + Protocol::new(1, 7).append_writer_features(vec![WriterFeature::ChangeDataFeed]); let actions = vec![Action::Protocol(protocol)]; let mut table: DeltaTable = DeltaOps::new_in_memory() .create() diff --git a/crates/core/src/operations/constraints.rs b/crates/core/src/operations/constraints.rs index 7be5205574..97ea29c38e 100644 --- a/crates/core/src/operations/constraints.rs +++ b/crates/core/src/operations/constraints.rs @@ -21,7 +21,7 @@ use crate::protocol::DeltaOperation; use crate::table::state::DeltaTableState; use crate::table::Constraint; use crate::{DeltaResult, DeltaTable, DeltaTableError}; -use delta_kernel::table_features::WriterFeatures; +use delta_kernel::table_features::WriterFeature; use super::datafusion_utils::into_expr; use super::transaction::{CommitBuilder, CommitProperties}; @@ -201,7 +201,7 @@ impl std::future::IntoFuture for ConstraintBuilder { } else { let current_features = old_protocol.writer_features.clone(); if let Some(mut features) = current_features { - features.insert(WriterFeatures::CheckConstraints); + features.insert(WriterFeature::CheckConstraints); Some(features) } else { current_features diff --git a/crates/core/src/operations/transaction/mod.rs b/crates/core/src/operations/transaction/mod.rs index 031d8e4986..3f055afe06 100644 --- a/crates/core/src/operations/transaction/mod.rs +++ b/crates/core/src/operations/transaction/mod.rs @@ -98,7 +98,7 @@ use crate::storage::ObjectStoreRef; use crate::table::config::TableConfig; use crate::table::state::DeltaTableState; use crate::{crate_version, DeltaResult}; -use delta_kernel::table_features::{ReaderFeatures, WriterFeatures}; +use delta_kernel::table_features::{ReaderFeature, WriterFeature}; use serde::{Deserialize, Serialize}; use super::CustomExecuteHandler; @@ -181,19 +181,19 @@ pub enum TransactionError { /// Error returned when unsupported reader features are required #[error("Unsupported reader features required: {0:?}")] - UnsupportedReaderFeatures(Vec), + UnsupportedReaderFeatures(Vec), /// Error returned when unsupported writer features are required #[error("Unsupported writer features required: {0:?}")] - UnsupportedWriterFeatures(Vec), + UnsupportedWriterFeatures(Vec), /// Error returned when writer features are required but not specified #[error("Writer features must be specified for writerversion >= 7, please specify: {0:?}")] - WriterFeaturesRequired(WriterFeatures), + WriterFeaturesRequired(WriterFeature), /// Error returned when reader features are required but not specified #[error("Reader features must be specified for reader version >= 3, please specify: {0:?}")] - ReaderFeaturesRequired(ReaderFeatures), + ReaderFeaturesRequired(ReaderFeature), /// The transaction failed to commit due to an error in an implementation-specific layer. /// Currently used by DynamoDb-backed S3 log store when database operations fail. @@ -683,7 +683,7 @@ impl<'a> std::future::IntoFuture for PreparedCommit<'a> { } steps -= 1; } - // Update snapshot to latest version after succesful conflict check + // Update snapshot to latest version after successful conflict check read_snapshot .update(this.log_store.clone(), Some(latest_version)) .await?; diff --git a/crates/core/src/operations/transaction/protocol.rs b/crates/core/src/operations/transaction/protocol.rs index 85c6deaea1..b0fd87f16b 100644 --- a/crates/core/src/operations/transaction/protocol.rs +++ b/crates/core/src/operations/transaction/protocol.rs @@ -5,60 +5,60 @@ use super::{TableReference, TransactionError}; use crate::kernel::{contains_timestampntz, Action, EagerSnapshot, Protocol, Schema}; use crate::protocol::DeltaOperation; use crate::table::state::DeltaTableState; -use delta_kernel::table_features::{ReaderFeatures, WriterFeatures}; +use delta_kernel::table_features::{ReaderFeature, WriterFeature}; -static READER_V2: LazyLock> = - LazyLock::new(|| HashSet::from_iter([ReaderFeatures::ColumnMapping])); -static WRITER_V2: LazyLock> = - LazyLock::new(|| HashSet::from_iter([WriterFeatures::AppendOnly, WriterFeatures::Invariants])); -static WRITER_V3: LazyLock> = LazyLock::new(|| { +static READER_V2: LazyLock> = + LazyLock::new(|| HashSet::from_iter([ReaderFeature::ColumnMapping])); +static WRITER_V2: LazyLock> = + LazyLock::new(|| HashSet::from_iter([WriterFeature::AppendOnly, WriterFeature::Invariants])); +static WRITER_V3: LazyLock> = LazyLock::new(|| { HashSet::from_iter([ - WriterFeatures::AppendOnly, - WriterFeatures::Invariants, - WriterFeatures::CheckConstraints, + WriterFeature::AppendOnly, + WriterFeature::Invariants, + WriterFeature::CheckConstraints, ]) }); -static WRITER_V4: LazyLock> = LazyLock::new(|| { +static WRITER_V4: LazyLock> = LazyLock::new(|| { HashSet::from_iter([ - WriterFeatures::AppendOnly, - WriterFeatures::Invariants, - WriterFeatures::CheckConstraints, - WriterFeatures::ChangeDataFeed, - WriterFeatures::GeneratedColumns, + WriterFeature::AppendOnly, + WriterFeature::Invariants, + WriterFeature::CheckConstraints, + WriterFeature::ChangeDataFeed, + WriterFeature::GeneratedColumns, ]) }); -static WRITER_V5: LazyLock> = LazyLock::new(|| { +static WRITER_V5: LazyLock> = LazyLock::new(|| { HashSet::from_iter([ - WriterFeatures::AppendOnly, - WriterFeatures::Invariants, - WriterFeatures::CheckConstraints, - WriterFeatures::ChangeDataFeed, - WriterFeatures::GeneratedColumns, - WriterFeatures::ColumnMapping, + WriterFeature::AppendOnly, + WriterFeature::Invariants, + WriterFeature::CheckConstraints, + WriterFeature::ChangeDataFeed, + WriterFeature::GeneratedColumns, + WriterFeature::ColumnMapping, ]) }); -static WRITER_V6: LazyLock> = LazyLock::new(|| { +static WRITER_V6: LazyLock> = LazyLock::new(|| { HashSet::from_iter([ - WriterFeatures::AppendOnly, - WriterFeatures::Invariants, - WriterFeatures::CheckConstraints, - WriterFeatures::ChangeDataFeed, - WriterFeatures::GeneratedColumns, - WriterFeatures::ColumnMapping, - WriterFeatures::IdentityColumns, + WriterFeature::AppendOnly, + WriterFeature::Invariants, + WriterFeature::CheckConstraints, + WriterFeature::ChangeDataFeed, + WriterFeature::GeneratedColumns, + WriterFeature::ColumnMapping, + WriterFeature::IdentityColumns, ]) }); pub struct ProtocolChecker { - reader_features: HashSet, - writer_features: HashSet, + reader_features: HashSet, + writer_features: HashSet, } impl ProtocolChecker { /// Create a new protocol checker. pub fn new( - reader_features: HashSet, - writer_features: HashSet, + reader_features: HashSet, + writer_features: HashSet, ) -> Self { Self { reader_features, @@ -89,23 +89,23 @@ impl ProtocolChecker { schema: &Schema, ) -> Result<(), TransactionError> { let contains_timestampntz = contains_timestampntz(schema.fields()); - let required_features: Option<&HashSet> = + let required_features: Option<&HashSet> = match snapshot.protocol().min_writer_version { 0..=6 => None, _ => snapshot.protocol().writer_features.as_ref(), }; if let Some(table_features) = required_features { - if !table_features.contains(&WriterFeatures::TimestampWithoutTimezone) + if !table_features.contains(&WriterFeature::TimestampWithoutTimezone) && contains_timestampntz { return Err(TransactionError::WriterFeaturesRequired( - WriterFeatures::TimestampWithoutTimezone, + WriterFeature::TimestampWithoutTimezone, )); } } else if contains_timestampntz { return Err(TransactionError::WriterFeaturesRequired( - WriterFeatures::TimestampWithoutTimezone, + WriterFeature::TimestampWithoutTimezone, )); } Ok(()) @@ -117,13 +117,11 @@ impl ProtocolChecker { } pub fn can_read_from_protocol(&self, protocol: &Protocol) -> Result<(), TransactionError> { - let required_features: Option<&HashSet> = match protocol.min_reader_version - { + let required_features: Option<&HashSet> = match protocol.min_reader_version { 0 | 1 => None, 2 => Some(&READER_V2), _ => protocol.reader_features.as_ref(), }; - println!("required_features: {:?}", required_features); if let Some(features) = required_features { let mut diff = features.difference(&self.reader_features).peekable(); if diff.peek().is_some() { @@ -141,7 +139,7 @@ impl ProtocolChecker { self.can_read_from(snapshot)?; let min_writer_version = snapshot.protocol().min_writer_version; - let required_features: Option<&HashSet> = match min_writer_version { + let required_features: Option<&HashSet> = match min_writer_version { 0 | 1 => None, 2 => Some(&WRITER_V2), 3 => Some(&WRITER_V3), @@ -181,9 +179,9 @@ impl ProtocolChecker { .writer_features .as_ref() .ok_or(TransactionError::WriterFeaturesRequired( - WriterFeatures::AppendOnly, + WriterFeature::AppendOnly, ))? - .contains(&WriterFeatures::AppendOnly) + .contains(&WriterFeature::AppendOnly) && snapshot.config().append_only() }; if append_only_enabled { @@ -213,21 +211,21 @@ impl ProtocolChecker { /// resulting version support is determined by the supported table feature set. pub static INSTANCE: LazyLock = LazyLock::new(|| { let mut reader_features = HashSet::new(); - reader_features.insert(ReaderFeatures::TimestampWithoutTimezone); - // reader_features.insert(ReaderFeatures::ColumnMapping); + reader_features.insert(ReaderFeature::TimestampWithoutTimezone); + // reader_features.insert(ReaderFeature::ColumnMapping); let mut writer_features = HashSet::new(); - writer_features.insert(WriterFeatures::AppendOnly); - writer_features.insert(WriterFeatures::TimestampWithoutTimezone); + writer_features.insert(WriterFeature::AppendOnly); + writer_features.insert(WriterFeature::TimestampWithoutTimezone); #[cfg(feature = "datafusion")] { - writer_features.insert(WriterFeatures::ChangeDataFeed); - writer_features.insert(WriterFeatures::Invariants); - writer_features.insert(WriterFeatures::CheckConstraints); - writer_features.insert(WriterFeatures::GeneratedColumns); + writer_features.insert(WriterFeature::ChangeDataFeed); + writer_features.insert(WriterFeature::Invariants); + writer_features.insert(WriterFeature::CheckConstraints); + writer_features.insert(WriterFeature::GeneratedColumns); } - // writer_features.insert(WriterFeatures::ColumnMapping); - // writer_features.insert(WriterFeatures::IdentityColumns); + // writer_features.insert(WriterFeature::ColumnMapping); + // writer_features.insert(WriterFeature::IdentityColumns); ProtocolChecker::new(reader_features, writer_features) }); @@ -289,7 +287,7 @@ mod tests { ]; let neutral_op = DeltaOperation::Update { predicate: None }; - let create_actions = |writer: i32, append: &str, feat: Vec| { + let create_actions = |writer: i32, append: &str, feat: Vec| { vec![ Action::Protocol(Protocol { min_reader_version: 1, @@ -346,7 +344,7 @@ mod tests { .can_commit(eager, &neutral_actions, &neutral_op) .is_ok()); - let actions = create_actions(7, "true", vec![WriterFeatures::AppendOnly]); + let actions = create_actions(7, "true", vec![WriterFeature::AppendOnly]); let snapshot = DeltaTableState::from_actions(actions).unwrap(); let eager = snapshot.snapshot(); assert!(checker @@ -359,7 +357,7 @@ mod tests { .can_commit(eager, &neutral_actions, &neutral_op) .is_ok()); - let actions = create_actions(7, "false", vec![WriterFeatures::AppendOnly]); + let actions = create_actions(7, "false", vec![WriterFeature::AppendOnly]); let snapshot = DeltaTableState::from_actions(actions).unwrap(); let eager = snapshot.snapshot(); assert!(checker @@ -555,7 +553,7 @@ mod tests { let checker_5 = ProtocolChecker::new(READER_V2.clone(), WRITER_V4.clone()); let actions = vec![ Action::Protocol( - Protocol::new(2, 4).append_writer_features(vec![WriterFeatures::ChangeDataFeed]), + Protocol::new(2, 4).append_writer_features(vec![WriterFeature::ChangeDataFeed]), ), metadata_action(None).into(), ]; @@ -572,7 +570,7 @@ mod tests { let checker_5 = ProtocolChecker::new(READER_V2.clone(), WRITER_V4.clone()); let actions = vec![ Action::Protocol( - Protocol::new(2, 4).append_writer_features([WriterFeatures::GeneratedColumns]), + Protocol::new(2, 4).append_writer_features([WriterFeature::GeneratedColumns]), ), metadata_action(None).into(), ]; diff --git a/crates/core/src/protocol/parquet_read/mod.rs b/crates/core/src/protocol/parquet_read/mod.rs index a5c8379467..6b48e55c1d 100644 --- a/crates/core/src/protocol/parquet_read/mod.rs +++ b/crates/core/src/protocol/parquet_read/mod.rs @@ -1,7 +1,7 @@ use std::{collections::HashMap, str::FromStr}; use chrono::{SecondsFormat, TimeZone, Utc}; -use delta_kernel::table_features::{ReaderFeatures, WriterFeatures}; +use delta_kernel::table_features::{ReaderFeature, WriterFeature}; use num_bigint::BigInt; use num_traits::cast::ToPrimitive; use parquet::record::{Field, ListAccessor, MapAccessor, RowAccessor}; @@ -627,7 +627,7 @@ impl Protocol { .iter() .filter_map(|v| match v { Field::Str(feature) => { - ReaderFeatures::try_from(feature.as_str()).ok() + ReaderFeature::try_from(feature.as_str()).ok() } _ => None, }) @@ -643,7 +643,7 @@ impl Protocol { .iter() .filter_map(|v| match v { Field::Str(feature) => { - WriterFeatures::try_from(feature.as_str()).ok() + WriterFeature::try_from(feature.as_str()).ok() } _ => None, }) diff --git a/crates/core/src/test_utils/factories/actions.rs b/crates/core/src/test_utils/factories/actions.rs index 23f7206d89..bd55b260e1 100644 --- a/crates/core/src/test_utils/factories/actions.rs +++ b/crates/core/src/test_utils/factories/actions.rs @@ -11,7 +11,7 @@ use crate::kernel::arrow::extract::{self as ex}; use crate::kernel::partitions_schema; use crate::kernel::{Add, Metadata, Protocol, Remove, StructType}; use crate::operations::transaction::PROTOCOL; -use delta_kernel::table_features::{ReaderFeatures, WriterFeatures}; +use delta_kernel::table_features::{ReaderFeature, WriterFeature}; pub struct ActionFactory; @@ -101,8 +101,8 @@ impl ActionFactory { pub fn protocol( max_reader: Option, max_writer: Option, - reader_features: Option>, - writer_features: Option>, + reader_features: Option>, + writer_features: Option>, ) -> Protocol { Protocol { min_reader_version: max_reader.unwrap_or(PROTOCOL.default_reader_version()), diff --git a/crates/core/tests/exotic_tables.rs b/crates/core/tests/exotic_tables.rs index 72b4375820..b9b1ea6073 100644 --- a/crates/core/tests/exotic_tables.rs +++ b/crates/core/tests/exotic_tables.rs @@ -1,26 +1,24 @@ #[tokio::test] async fn test_exotic_tables() { let dir = env!("CARGO_MANIFEST_DIR"); - let data_path = std::path::Path::new(dir).join("tests/data/exotic_logs"); + let data_path = std::path::Path::new(dir).join("tests/data_err_logs"); let full = data_path.canonicalize().unwrap(); let cases = vec![ - // ("table_a", false), - // ("table_b", false), - // ("table_c", true), - // ("table_d", true), - // ("table_e", true), - // ("table_f", true), - ("table_g", false), - // ("table_h", true), - // ("table_i", true), + ("table_a", false), + ("table_b", false), + ("table_c", true), + ("table_d", true), + ("table_e", true), + ("table_f", true), + // ("table_g", false), + ("table_h", true), + ("table_i", true), ]; for (name, should_error) in cases { let table_path = full.join(name); - println!("{table_path:?}"); let table = deltalake_core::open_table(&table_path.to_string_lossy()).await; - println!("table: {:?}", table); if should_error { assert!(table.is_err()); } else { From bde3b345cb9d94da6068d238832bf89edbdce49a Mon Sep 17 00:00:00 2001 From: Robert Pack Date: Tue, 8 Apr 2025 23:45:50 +0200 Subject: [PATCH 17/23] test: update or disable tests with unsupported features Signed-off-by: Robert Pack --- Cargo.toml | 3 +-- crates/core/src/operations/transaction/protocol.rs | 4 ++-- crates/core/src/protocol/mod.rs | 1 + crates/core/tests/read_delta_log_test.rs | 4 ++++ python/tests/test_create.py | 7 ++++--- 5 files changed, 12 insertions(+), 7 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 003d054923..05c7fb658d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,8 +26,7 @@ debug = true debug = "line-tables-only" [workspace.dependencies] -# delta_kernel = { version = "0.8.0", features = ["arrow_54", "default-engine-rustls"] } -delta_kernel = { git = "https://github.com/hntd187/delta-kernel-rs", rev = "00d1d9c8169c5cd7901682be2024687f79e101e3", features = [ +delta_kernel = { version = "0.9.0", features = [ "arrow_54", "developer-visibility", "default-engine-rustls", diff --git a/crates/core/src/operations/transaction/protocol.rs b/crates/core/src/operations/transaction/protocol.rs index b0fd87f16b..495638b3c0 100644 --- a/crates/core/src/operations/transaction/protocol.rs +++ b/crates/core/src/operations/transaction/protocol.rs @@ -581,8 +581,8 @@ mod tests { #[tokio::test] async fn test_minwriter_v4_with_generated_columns_and_expressions() { - let checker_5 = ProtocolChecker::new(READER_V2.clone(), WRITER_V4.clone()); - let actions = vec![Action::Protocol(Protocol::new(2, 4))]; + let checker_5 = ProtocolChecker::new(Default::default(), WRITER_V4.clone()); + let actions = vec![Action::Protocol(Protocol::new(1, 4))]; let table: crate::DeltaTable = crate::DeltaOps::new_in_memory() .create() diff --git a/crates/core/src/protocol/mod.rs b/crates/core/src/protocol/mod.rs index 2e3e1de801..37d32505b6 100644 --- a/crates/core/src/protocol/mod.rs +++ b/crates/core/src/protocol/mod.rs @@ -931,6 +931,7 @@ mod tests { } #[tokio::test] + #[ignore = "enable when deletion vector is supported"] async fn test_with_deletion_vector() { // test table with partitions let path = "../test/tests/data/table_with_deletion_logs"; diff --git a/crates/core/tests/read_delta_log_test.rs b/crates/core/tests/read_delta_log_test.rs index a6d2dc8833..2d81b7bc18 100644 --- a/crates/core/tests/read_delta_log_test.rs +++ b/crates/core/tests/read_delta_log_test.rs @@ -144,6 +144,7 @@ async fn test_log_buffering_fail() { } #[tokio::test] +#[ignore = "not implemented"] async fn test_read_liquid_table() -> DeltaResult<()> { let path = "../test/tests/data/table_with_liquid_clustering"; let _table = deltalake_core::open_table(&path).await?; @@ -151,6 +152,7 @@ async fn test_read_liquid_table() -> DeltaResult<()> { } #[tokio::test] +#[ignore = "not implemented"] async fn test_read_table_features() -> DeltaResult<()> { let mut _table = deltalake_core::open_table("../test/tests/data/simple_table_features").await?; let rf = _table.protocol()?.reader_features.clone(); @@ -165,6 +167,7 @@ async fn test_read_table_features() -> DeltaResult<()> { // test for: https://github.com/delta-io/delta-rs/issues/1302 #[tokio::test] +#[ignore = "not implemented"] async fn read_delta_table_from_dlt() { let table = deltalake_core::open_table("../test/tests/data/delta-live-table") .await @@ -184,6 +187,7 @@ async fn read_delta_table_with_null_stats_in_notnull_struct() { } #[tokio::test] +#[ignore = "not implemented"] async fn read_delta_table_with_renamed_partitioning_column() { let table = deltalake_core::open_table("../test/tests/data/table_with_partitioning_mapping") .await diff --git a/python/tests/test_create.py b/python/tests/test_create.py index 3066bedac1..210537953f 100644 --- a/python/tests/test_create.py +++ b/python/tests/test_create.py @@ -64,6 +64,7 @@ def test_create_schema(tmp_path: pathlib.Path, sample_data: pa.Table): assert dt.schema().to_pyarrow() == sample_data.schema +@pytest.mark.skip(reason="not implemented") def test_create_with_deletion_vectors_enabled( tmp_path: pathlib.Path, sample_table: pa.Table ): @@ -105,7 +106,7 @@ def test_create_higher_protocol_versions( description="test_desc", configuration={ "delta.appendOnly": "false", - "delta.minReaderVersion": "2", + "delta.minReaderVersion": "1", "delta.minWriterVersion": "5", }, commit_properties=CommitProperties(custom_metadata={"userName": "John Doe"}), @@ -117,10 +118,10 @@ def test_create_higher_protocol_versions( assert metadata.description == "test_desc" assert metadata.configuration == { "delta.appendOnly": "false", - "delta.minReaderVersion": "2", + "delta.minReaderVersion": "1", "delta.minWriterVersion": "5", } - assert protocol.min_reader_version == 2 + assert protocol.min_reader_version == 1 assert protocol.min_writer_version == 5 assert dt.history()[0]["userName"] == "John Doe" From 98d7c9aa8e40c5ee4dacd71da4d694da865efe62 Mon Sep 17 00:00:00 2001 From: Robert Pack Date: Fri, 11 Apr 2025 18:49:00 +0200 Subject: [PATCH 18/23] feat: first round of latest kernel Signed-off-by: Robert Pack --- Cargo.toml | 9 +- crates/core/src/kernel/snapshot_next/eager.rs | 124 ++++--------- .../src/kernel/snapshot_next/iterators.rs | 140 +------------- crates/core/src/kernel/snapshot_next/lazy.rs | 100 +++++----- crates/core/src/kernel/snapshot_next/mod.rs | 171 ++---------------- crates/core/src/operations/restore.rs | 6 +- crates/core/src/storage/mod.rs | 3 - 7 files changed, 99 insertions(+), 454 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index c79dec9d27..233057f7d5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,11 +26,16 @@ debug = true debug = "line-tables-only" [workspace.dependencies] -delta_kernel = { git = "https://github.com/delta-io/delta-kernel-rs.git", rev = "8961e9741a7b5dad22dce39cce001349e2a4cb0c", features = [ +delta_kernel = { git = "https://github.com/roeap/delta-kernel-rs", rev = "b0cd12264ae4ada8d51cff02b25864258568eb88", features = [ "arrow_54", "developer-visibility", "default-engine-rustls", ] } +# delta_kernel = { path = "../delta-kernel-rs/kernel", features = [ +# "arrow_54", +# "developer-visibility", +# "default-engine-rustls", +# ] } # arrow arrow = { version = "54" } @@ -62,7 +67,7 @@ datafusion-sql = "46" # serde serde = { version = "1.0.194", features = ["derive"] } serde_json = "1" -strum = { version = "0.27" } +strum = { version = "0.26" } # "stdlib" diff --git a/crates/core/src/kernel/snapshot_next/eager.rs b/crates/core/src/kernel/snapshot_next/eager.rs index 2e8ff24059..8c3b636f2b 100644 --- a/crates/core/src/kernel/snapshot_next/eager.rs +++ b/crates/core/src/kernel/snapshot_next/eager.rs @@ -1,25 +1,22 @@ use std::sync::Arc; -use arrow_array::{BooleanArray, RecordBatch}; -use arrow_select::filter::filter_record_batch; -use delta_kernel::actions::set_transaction::SetTransactionMap; -use delta_kernel::actions::{get_log_add_schema, get_log_schema, ADD_NAME, REMOVE_NAME}; +use arrow::compute::concat_batches; +use arrow_array::RecordBatch; +use delta_kernel::actions::visitors::SetTransactionMap; use delta_kernel::actions::{Add, Metadata, Protocol, SetTransaction}; -use delta_kernel::engine::arrow_data::ArrowEngineData; -use delta_kernel::log_segment::LogSegment; -use delta_kernel::scan::log_replay::scan_action_iter; +use delta_kernel::engine::arrow_extensions::ScanExt; use delta_kernel::schema::Schema; use delta_kernel::table_properties::TableProperties; -use delta_kernel::{EngineData, ExpressionRef, Table, Version}; +use delta_kernel::{ExpressionRef, Table, Version}; use itertools::Itertools; use object_store::ObjectStore; use url::Url; use super::iterators::AddIterator; use super::lazy::LazySnapshot; -use super::{replay_file_actions, scan_as_log_data, Snapshot, SnapshotError}; +use super::{Snapshot, SnapshotError}; use crate::kernel::CommitInfo; -use crate::{DeltaResult, DeltaTableConfig, DeltaTableError}; +use crate::{DeltaResult, DeltaTableConfig}; /// An eager snapshot of a Delta Table at a specific version. /// @@ -39,7 +36,7 @@ impl Snapshot for EagerSnapshot { self.snapshot.version() } - fn schema(&self) -> &Schema { + fn schema(&self) -> Arc { self.snapshot.schema() } @@ -58,46 +55,17 @@ impl Snapshot for EagerSnapshot { fn logical_files( &self, predicate: Option, - ) -> DeltaResult>>> { - let scan = self - .snapshot - .inner - .as_ref() - .clone() - .into_scan_builder() - .with_predicate(predicate) - .build()?; - - let iter = scan_action_iter( - self.snapshot.engine_ref().as_ref(), - vec![Ok(( - Box::new(ArrowEngineData::new(self.file_data()?.clone())) as Box, - false, - ))] - .into_iter(), - scan.physical_predicate() - .map(|p| (p, scan.schema().clone())), - ) - .map(|res| { - res.and_then(|(data, predicate)| { - let batch: RecordBatch = ArrowEngineData::try_from_engine_data(data)?.into(); - Ok(filter_record_batch(&batch, &BooleanArray::from(predicate))?) - }) - }) - .map(|batch| batch.map_err(|e| e.into())); - - Ok(Box::new(iter)) - } - - fn files( - &self, - predicate: Option, - ) -> DeltaResult>>> { - Ok(Box::new(std::iter::once(scan_as_log_data( - &self.snapshot, - vec![(self.file_data()?.clone(), false)], - predicate, - )))) + ) -> DeltaResult> + '_>> { + let scan = self.snapshot.inner.as_ref().clone(); + let builder = scan.into_scan_builder().with_predicate(predicate).build()?; + let iter: Vec<_> = builder + .scan_metadata_from_existing_arrow( + self.snapshot.engine_ref().as_ref(), + self.version(), + Some(self.file_data()?.clone()), + )? + .collect(); + Ok(Box::new(iter.into_iter().map(|sc| Ok(sc?.scan_files)))) } fn tombstones(&self) -> DeltaResult>>> { @@ -137,7 +105,10 @@ impl EagerSnapshot { LazySnapshot::try_new(Table::try_from_uri(table_root)?, store, version).await?; let files = config .require_files - .then(|| -> DeltaResult<_> { replay_file_actions(&snapshot, None) }) + .then(|| -> DeltaResult<_> { + let all: Vec = snapshot.logical_files(None)?.try_collect()?; + Ok(concat_batches(&all[0].schema(), &all)?) + }) .transpose()?; Ok(Self { snapshot, files }) } @@ -164,48 +135,23 @@ impl EagerSnapshot { fn update_impl(&mut self, target_version: Option) -> DeltaResult { let mut snapshot = self.snapshot.clone(); + let current = snapshot.version(); if !snapshot.update(target_version.clone())? { return Ok(false); } - let log_root = snapshot - .table_root() - .join("_delta_log/") - .map_err(|e| DeltaTableError::generic(e))?; - let fs_client = snapshot.engine_ref().get_file_system_client(); - let commit_read_schema = get_log_schema().project(&[ADD_NAME, REMOVE_NAME])?; - let checkpoint_read_schema = get_log_add_schema().clone(); - - let segment = LogSegment::for_table_changes( - fs_client.as_ref(), - log_root, - self.snapshot.version() + 1, - snapshot.version(), - )?; - let mut slice_iter = segment - .replay( - self.snapshot.engine_ref().as_ref(), - commit_read_schema, - checkpoint_read_schema, - None, + let scan = snapshot.inner.clone().scan_builder().build()?; + let engine = snapshot.engine_ref().clone(); + let files: Vec<_> = scan + .scan_metadata_from_existing_arrow( + engine.as_ref(), + current, + Some(self.file_data()?.clone()), )? - .map_ok( - |(data, flag)| -> Result<(RecordBatch, bool), delta_kernel::Error> { - Ok((ArrowEngineData::try_from_engine_data(data)?.into(), flag)) - }, - ) - .flatten() - .collect::, _>>()?; - - slice_iter.push(( - self.files - .as_ref() - .ok_or(SnapshotError::FilesNotInitialized)? - .clone(), - false, - )); - - self.files = Some(scan_as_log_data(&self.snapshot, slice_iter, None)?); + .map_ok(|s| s.scan_files) + .try_collect()?; + + self.files = Some(concat_batches(&files[0].schema(), &files)?); self.snapshot = snapshot; Ok(true) diff --git a/crates/core/src/kernel/snapshot_next/iterators.rs b/crates/core/src/kernel/snapshot_next/iterators.rs index 1bfec67eec..2d609ed0ee 100644 --- a/crates/core/src/kernel/snapshot_next/iterators.rs +++ b/crates/core/src/kernel/snapshot_next/iterators.rs @@ -28,7 +28,7 @@ impl AddIterator<'_> { pub fn try_new(actions: &RecordBatch) -> DeltaResult> { validate_add(&actions)?; - let visitor = AddVisitor::new(); + let visitor = AddVisitor::default(); let fields = visitor.selected_column_names_and_types(); let mut mask = HashSet::new(); @@ -67,81 +67,6 @@ impl Iterator for AddIterator<'_> { } } -pub struct AddView { - actions: RecordBatch, - index: usize, -} - -impl AddView { - pub fn path(&self) -> &str { - extract_column(&self.actions, &[ADD_NAME, "path"]) - .unwrap() - .as_string::() - .value(self.index) - } - - pub fn size(&self) -> i64 { - extract_column(&self.actions, &[ADD_NAME, "size"]) - .unwrap() - .as_primitive::() - .value(self.index) - } - - pub fn modification_time(&self) -> i64 { - extract_column(&self.actions, &[ADD_NAME, "modificationTime"]) - .unwrap() - .as_primitive::() - .value(self.index) - } - - /// Datetime of the last modification time of the file. - pub fn modification_datetime(&self) -> DeltaResult> { - DateTime::from_timestamp_millis(self.modification_time()).ok_or(DeltaTableError::from( - crate::protocol::ProtocolError::InvalidField(format!( - "invalid modification_time: {:?}", - self.modification_time() - )), - )) - } - - pub fn data_change(&self) -> bool { - extract_column(&self.actions, &[ADD_NAME, "dataChange"]) - .unwrap() - .as_boolean() - .value(self.index) - } - - pub fn stats(&self) -> Option<&str> { - extract_column(&self.actions, &[ADD_NAME, "stats"]) - .ok() - .and_then(|c| c.as_string_opt::().map(|v| v.value(self.index))) - } - - pub fn base_row_id(&self) -> Option { - extract_column(&self.actions, &[ADD_NAME, "baseRowId"]) - .ok() - .and_then(|c| { - c.as_primitive_opt::() - .map(|v| v.value(self.index)) - }) - } - - pub fn default_row_commit_version(&self) -> Option { - extract_column(&self.actions, &[ADD_NAME, "defaultRowCommitVersion"]) - .ok() - .and_then(|c| { - c.as_primitive_opt::() - .map(|v| v.value(self.index)) - }) - } - - pub fn clustering_provider(&self) -> Option<&str> { - extract_column(&self.actions, &[ADD_NAME, "clusteringProvider"]) - .ok() - .and_then(|c| c.as_string_opt::().map(|v| v.value(self.index))) - } -} - #[derive(Clone)] pub struct LogicalFileView { files: RecordBatch, @@ -264,69 +189,6 @@ where } } -pub struct AddViewIterator -where - I: IntoIterator>, -{ - inner: I::IntoIter, - batch: Option, - current: usize, -} - -impl AddViewIterator -where - I: IntoIterator>, -{ - /// Create a new [AddViewIterator]. - /// - /// If `iter` is an infallible iterator, use `.map(Ok)`. - pub fn new(iter: I) -> Self { - Self { - inner: iter.into_iter(), - batch: None, - current: 0, - } - } -} - -impl Iterator for AddViewIterator -where - I: IntoIterator>, -{ - type Item = DeltaResult; - - fn next(&mut self) -> Option { - if let Some(batch) = &self.batch { - if self.current < batch.num_rows() { - let item = AddView { - actions: batch.clone(), - index: self.current, - }; - self.current += 1; - return Some(Ok(item)); - } - } - match self.inner.next() { - Some(Ok(batch)) => { - if validate_add(&batch).is_err() { - return Some(Err(DeltaTableError::generic( - "Invalid add action data encountered.", - ))); - } - self.batch = Some(batch); - self.current = 0; - self.next() - } - Some(Err(e)) => Some(Err(e)), - None => None, - } - } - - fn size_hint(&self) -> (usize, Option) { - self.inner.size_hint() - } -} - pub(crate) fn validate_add(batch: &RecordBatch) -> DeltaResult<()> { validate_column::(batch, &[ADD_NAME, "path"])?; validate_column::(batch, &[ADD_NAME, "size"])?; diff --git a/crates/core/src/kernel/snapshot_next/lazy.rs b/crates/core/src/kernel/snapshot_next/lazy.rs index 3a203e1ad1..41a260c6cd 100644 --- a/crates/core/src/kernel/snapshot_next/lazy.rs +++ b/crates/core/src/kernel/snapshot_next/lazy.rs @@ -3,13 +3,15 @@ use std::io::{BufRead, BufReader, Cursor}; use std::sync::{Arc, LazyLock}; -use arrow_array::{BooleanArray, RecordBatch}; +use arrow::array::AsArray; +use arrow_array::RecordBatch; use arrow_select::filter::filter_record_batch; -use delta_kernel::actions::set_transaction::{SetTransactionMap, SetTransactionScanner}; -use delta_kernel::actions::{get_log_schema, REMOVE_NAME}; +use delta_kernel::actions::set_transaction::SetTransactionScanner; +use delta_kernel::actions::visitors::SetTransactionMap; +use delta_kernel::actions::{get_log_schema, REMOVE_NAME, SIDECAR_NAME}; use delta_kernel::actions::{Metadata, Protocol, SetTransaction}; use delta_kernel::engine::arrow_data::ArrowEngineData; -use delta_kernel::engine::arrow_expression::evaluate_expression; +use delta_kernel::engine::arrow_extensions::{ExpressionEvaluatorExt, ScanExt}; use delta_kernel::engine::default::executor::tokio::{ TokioBackgroundExecutor, TokioMultiThreadExecutor, }; @@ -18,14 +20,15 @@ use delta_kernel::log_segment::LogSegment; use delta_kernel::schema::{DataType, Schema}; use delta_kernel::snapshot::Snapshot as SnapshotInner; use delta_kernel::table_properties::TableProperties; -use delta_kernel::{Engine, Expression, ExpressionHandler, ExpressionRef, Table, Version}; +use delta_kernel::{ + Engine, EvaluationHandler, Expression, ExpressionEvaluator, ExpressionRef, Table, Version, +}; use itertools::Itertools; -use object_store::path::Path; use object_store::ObjectStore; use url::Url; use super::cache::CommitCacheObjectStore; -use super::{replay_file_actions, Snapshot}; +use super::Snapshot; use crate::kernel::{Action, CommitInfo, ARROW_HANDLER}; use crate::{DeltaResult, DeltaTableError}; @@ -46,7 +49,7 @@ impl Snapshot for LazySnapshot { self.inner.version() } - fn schema(&self) -> &Schema { + fn schema(&self) -> Arc { self.inner.schema() } @@ -65,61 +68,50 @@ impl Snapshot for LazySnapshot { fn logical_files( &self, predicate: Option, - ) -> DeltaResult>>> { + ) -> DeltaResult> + '_>> { let scan = self .inner .clone() .scan_builder() .with_predicate(predicate) .build()?; - Ok(Box::new( - scan.scan_data(self.engine.as_ref())? - .map(|res| { - res.and_then(|(data, predicate)| { - let batch: RecordBatch = - ArrowEngineData::try_from_engine_data(data)?.into(); - Ok(filter_record_batch(&batch, &BooleanArray::from(predicate))?) - }) - }) - .map(|batch| batch.map_err(|e| e.into())), - )) - } - fn files( - &self, - predicate: Option>, - ) -> DeltaResult>>> { - Ok(Box::new(std::iter::once(replay_file_actions( - &self, predicate, - )))) + // Move scan_metadata_arrow to a separate variable to avoid returning a reference to a local variable + let engine = self.engine.clone(); + let scan_result: Vec<_> = scan + .scan_metadata_arrow(engine.as_ref())? + .map(|sc| Ok(sc?.scan_files)) + .collect(); + Ok(Box::new(scan_result.into_iter())) } fn tombstones(&self) -> DeltaResult>>> { static META_PREDICATE: LazyLock = LazyLock::new(|| Arc::new(Expression::column([REMOVE_NAME, "path"]).is_not_null())); + static EVALUATOR: LazyLock> = LazyLock::new(|| { + ARROW_HANDLER.new_expression_evaluator( + get_log_schema().project(&[REMOVE_NAME]).unwrap(), + META_PREDICATE.as_ref().clone(), + DataType::BOOLEAN, + ) + }); let read_schema = get_log_schema().project(&[REMOVE_NAME])?; + let read_schema2 = get_log_schema().project(&[REMOVE_NAME, SIDECAR_NAME])?; Ok(Box::new( self.inner - ._log_segment() - .replay( + .log_segment() + .read_actions( self.engine.as_ref(), - read_schema.clone(), read_schema, + read_schema2, Some(META_PREDICATE.clone()), )? .map_ok(|(d, _)| { let batch = RecordBatch::from(ArrowEngineData::try_from_engine_data(d)?); - let selection = evaluate_expression( - META_PREDICATE.as_ref(), - &batch, - Some(&DataType::BOOLEAN), - )?; - let filter = selection - .as_any() - .downcast_ref::() - .ok_or_else(|| { - DeltaTableError::generic("failed to downcast to BooleanArray") - })?; + let selection = EVALUATOR.evaluate_arrow(batch.clone())?; + let filter = selection.column(0).as_boolean_opt().ok_or_else(|| { + DeltaTableError::generic("failed to downcast to BooleanArray") + })?; Ok(filter_record_batch(&batch, filter)?) }) .flatten(), @@ -142,7 +134,7 @@ impl Snapshot for LazySnapshot { limit: Option, ) -> DeltaResult>> { // let start_version = start_version.into(); - let fs_client = self.engine.get_file_system_client(); + let fs_client = self.engine.storage_handler(); let end_version = start_version.unwrap_or_else(|| self.version()); let start_version = limit .and_then(|limit| { @@ -191,9 +183,10 @@ impl Snapshot for LazySnapshot { } fn update(&mut self, target_version: Option) -> DeltaResult { - let mut snapshot = self.inner.as_ref().clone(); - let did_update = snapshot.update(target_version, self.engine_ref().as_ref())?; - self.inner = Arc::new(snapshot); + let snapshot = + SnapshotInner::try_new_from(self.inner.clone(), self.engine.as_ref(), target_version)?; + let did_update = snapshot.version() != self.inner.version(); + self.inner = snapshot; Ok(did_update) } } @@ -212,23 +205,16 @@ impl LazySnapshot { ) -> DeltaResult { // TODO: how to deal with the dedicated IO runtime? Would this already be covered by the // object store implementation pass to this? - let table_root = Path::from_url_path(table.location().path())?; - let store_str = format!("{}", store); - let is_local = store_str.starts_with("LocalFileSystem"); let store = Arc::new(CommitCacheObjectStore::new(store)); let handle = tokio::runtime::Handle::current(); let engine: Arc = match handle.runtime_flavor() { - tokio::runtime::RuntimeFlavor::MultiThread => Arc::new(DefaultEngine::new_with_opts( + tokio::runtime::RuntimeFlavor::MultiThread => Arc::new(DefaultEngine::new( store, - table_root, Arc::new(TokioMultiThreadExecutor::new(handle)), - !is_local, )), - tokio::runtime::RuntimeFlavor::CurrentThread => Arc::new(DefaultEngine::new_with_opts( + tokio::runtime::RuntimeFlavor::CurrentThread => Arc::new(DefaultEngine::new( store, - table_root, Arc::new(TokioBackgroundExecutor::new()), - !is_local, )), _ => return Err(DeltaTableError::generic("unsupported runtime flavor")), }; @@ -249,7 +235,7 @@ impl LazySnapshot { /// current log segment, `None` is returned. pub fn version_timestamp(&self, version: Version) -> Option { self.inner - ._log_segment() + .log_segment() .ascending_commit_files .iter() .find(|f| f.version == version) @@ -278,7 +264,7 @@ mod tests { let schema_string = r#"{"type":"struct","fields":[{"name":"id","type":"long","nullable":true,"metadata":{}}]}"#; let expected: StructType = serde_json::from_str(schema_string)?; - assert_eq!(snapshot.schema(), &expected); + assert_eq!(snapshot.schema().as_ref(), &expected); let infos = snapshot.commit_infos(None, None)?.collect_vec(); assert_eq!(infos.len(), 5); diff --git a/crates/core/src/kernel/snapshot_next/mod.rs b/crates/core/src/kernel/snapshot_next/mod.rs index 079ceb0298..a558d84b92 100644 --- a/crates/core/src/kernel/snapshot_next/mod.rs +++ b/crates/core/src/kernel/snapshot_next/mod.rs @@ -2,23 +2,15 @@ use std::sync::Arc; -use arrow_array::{BooleanArray, RecordBatch, StructArray}; -use arrow_select::concat::concat_batches; -use arrow_select::filter::filter_record_batch; +use arrow_array::RecordBatch; use delta_kernel::actions::visitors::SetTransactionMap; -use delta_kernel::actions::{ - get_log_add_schema, get_log_schema, Metadata, Protocol, SetTransaction, ADD_NAME, REMOVE_NAME, -}; -use delta_kernel::engine::arrow_data::ArrowEngineData; -use delta_kernel::engine::arrow_expression::apply_schema; +use delta_kernel::actions::{Metadata, Protocol, SetTransaction}; use delta_kernel::expressions::{Scalar, StructData}; -use delta_kernel::scan::log_replay::scan_action_iter; use delta_kernel::scan::scan_row_schema; -use delta_kernel::schema::{DataType, Schema}; +use delta_kernel::schema::Schema; use delta_kernel::table_properties::TableProperties; -use delta_kernel::{EngineData, ExpressionRef, Version}; -use iterators::{AddView, AddViewIterator, LogicalFileView, LogicalFileViewIterator}; -use itertools::Itertools; +use delta_kernel::{ExpressionRef, Version}; +use iterators::{LogicalFileView, LogicalFileViewIterator}; use url::Url; use crate::kernel::actions::CommitInfo; @@ -75,7 +67,7 @@ pub trait Snapshot { fn version(&self) -> Version; /// Table [`Schema`] at this `Snapshot`s version. - fn schema(&self) -> &Schema; + fn schema(&self) -> Arc; /// Table [`Metadata`] at this `Snapshot`s version. /// @@ -93,7 +85,7 @@ pub trait Snapshot { /// Get the [`TableProperties`] for this [`Snapshot`]. fn table_properties(&self) -> &TableProperties; - fn logical_file_schema(&self) -> &'static Schema { + fn logical_file_schema(&self) -> Schema { scan_row_schema() } @@ -107,46 +99,18 @@ pub trait Snapshot { fn logical_files( &self, predicate: Option, - ) -> DeltaResult>>>; + ) -> DeltaResult> + '_>>; fn logical_files_view( &self, predicate: Option, - ) -> DeltaResult>>> { + ) -> DeltaResult> + '_>> { #[allow(deprecated)] Ok(Box::new(LogicalFileViewIterator::new( self.logical_files(predicate)?, ))) } - /// Get all currently active files in the table. - /// - /// # Parameters - /// - `predicate`: An optional predicate to filter the files based on file statistics. - /// - /// # Returns - /// An iterator of [`RecordBatch`]es, where each batch contains add action data. - #[deprecated( - since = "0.25.0", - note = "Use `logical_files` instead, which returns a more focussed dataset and avoids computational overhead." - )] - fn files( - &self, - predicate: Option, - ) -> DeltaResult>>>; - - #[deprecated( - since = "0.25.0", - note = "Use `logical_files_view` instead, which returns a more focussed dataset and avoids computational overhead." - )] - fn files_view( - &self, - predicate: Option, - ) -> DeltaResult>>> { - #[allow(deprecated)] - Ok(Box::new(AddViewIterator::new(self.files(predicate)?))) - } - /// Get all tombstones in the table. /// /// Remove Actions (tombstones) are records that indicate that a file has been deleted. @@ -220,7 +184,7 @@ impl Snapshot for Box { self.as_ref().version() } - fn schema(&self) -> &Schema { + fn schema(&self) -> Arc { self.as_ref().schema() } @@ -239,18 +203,10 @@ impl Snapshot for Box { fn logical_files( &self, predicate: Option, - ) -> DeltaResult>>> { + ) -> DeltaResult> + '_>> { self.as_ref().logical_files(predicate) } - fn files( - &self, - predicate: Option, - ) -> DeltaResult>>> { - #[allow(deprecated)] - self.as_ref().files(predicate) - } - fn tombstones(&self) -> DeltaResult>>> { self.as_ref().tombstones() } @@ -276,89 +232,6 @@ impl Snapshot for Box { } } -fn replay_file_actions( - snapshot: &LazySnapshot, - predicate: impl Into>, -) -> DeltaResult { - let commit_read_schema = get_log_schema().project(&[ADD_NAME, REMOVE_NAME])?; - let checkpoint_read_schema = get_log_add_schema().clone(); - - let curr_data = snapshot - .inner - ._log_segment() - .replay( - snapshot.engine_ref().as_ref(), - commit_read_schema.clone(), - checkpoint_read_schema.clone(), - None, - )? - .map_ok( - |(data, flag)| -> Result<(RecordBatch, bool), delta_kernel::Error> { - Ok((ArrowEngineData::try_from_engine_data(data)?.into(), flag)) - }, - ) - .flatten() - .collect::, _>>()?; - - scan_as_log_data(snapshot, curr_data, predicate) -} - -// helper function to replay log data as stored using kernel log replay. -// The kernel replay usually emits a tuple of (data, selection) where data is the -// data is a re-ordered subset of the full data in the log which is relevant to the -// engine. this function leverages the replay, but applies the selection to the -// original data to get the final data. -fn scan_as_log_data( - snapshot: &LazySnapshot, - curr_data: impl IntoIterator, - predicate: impl Into>, -) -> Result { - let curr_data = curr_data.into_iter().collect::>(); - let scan_iter = curr_data.clone().into_iter().map(|(data, flag)| { - Ok(( - Box::new(ArrowEngineData::new(data.clone())) as Box, - flag, - )) - }); - - let scan = snapshot - .inner - .as_ref() - .clone() - .into_scan_builder() - .with_predicate(predicate) - .build()?; - - let res = scan_action_iter( - snapshot.engine_ref().as_ref(), - scan_iter, - scan.physical_predicate() - .map(|p| (p, scan.schema().clone())), - ) - .map(|res| { - res.and_then(|(d, selection)| { - Ok(( - RecordBatch::from(ArrowEngineData::try_from_engine_data(d)?), - selection, - )) - }) - }) - .zip(curr_data.into_iter()) - .map(|(scan_res, (data_raw, _))| match scan_res { - Ok((_, selection)) => { - let data = filter_record_batch(&data_raw, &BooleanArray::from(selection))?; - let dt: DataType = get_log_add_schema().as_ref().clone().into(); - let data: StructArray = data.project(&[0])?.into(); - apply_schema(&data, &dt) - } - Err(e) => Err(e), - }) - .collect::, _>>()?; - - let schema_ref = Arc::new(get_log_add_schema().as_ref().try_into()?); - Ok(concat_batches(&schema_ref, &res)?) -} - #[cfg(test)] mod tests { use std::{future::Future, pin::Pin}; @@ -422,8 +295,6 @@ mod tests { let snapshot = get_snapshot(ctx, TestTables::Checkpoints, Some(version))?.await?; assert_eq!(snapshot.version(), version); - test_files(snapshot.as_ref())?; - test_files_view(snapshot.as_ref())?; test_commit_infos(snapshot.as_ref())?; test_logical_files(snapshot.as_ref())?; test_logical_files_view(snapshot.as_ref())?; @@ -434,8 +305,6 @@ mod tests { snapshot.update(Some(version))?; assert_eq!(snapshot.version(), version); - test_files(snapshot.as_ref())?; - test_files_view(snapshot.as_ref())?; test_commit_infos(snapshot.as_ref())?; test_logical_files(snapshot.as_ref())?; test_logical_files_view(snapshot.as_ref())?; @@ -465,24 +334,6 @@ mod tests { Ok(()) } - fn test_files(snapshot: &dyn Snapshot) -> TestResult<()> { - #[allow(deprecated)] - let batches = snapshot.files(None)?.collect::, _>>()?; - let num_files = batches.iter().map(|b| b.num_rows() as i64).sum::(); - assert_eq!((num_files as u64), snapshot.version()); - Ok(()) - } - - fn test_files_view(snapshot: &dyn Snapshot) -> TestResult<()> { - #[allow(deprecated)] - let num_files_view = snapshot - .files_view(None)? - .map(|f| f.unwrap().path().to_string()) - .count() as u64; - assert_eq!(num_files_view, snapshot.version()); - Ok(()) - } - fn test_commit_infos(snapshot: &dyn Snapshot) -> TestResult<()> { let commit_infos = snapshot.commit_infos(None, Some(100))?.collect::>(); assert_eq!((commit_infos.len() as u64), snapshot.version() + 1); diff --git a/crates/core/src/operations/restore.rs b/crates/core/src/operations/restore.rs index e452110927..49a19b7eba 100644 --- a/crates/core/src/operations/restore.rs +++ b/crates/core/src/operations/restore.rs @@ -369,10 +369,8 @@ impl std::future::IntoFuture for RestoreBuilder { #[cfg(test)] mod tests { - use super::*; - - use crate::writer::test_utils::{create_bare_table, get_arrow_schema, get_record_batch}; - use crate::{DeltaOps, DeltaResult, DeltaTable}; + use crate::writer::test_utils::{create_bare_table, get_record_batch}; + use crate::{DeltaOps, DeltaResult}; /// Verify that restore respects constraints that were added/removed in previous version_to_restore /// diff --git a/crates/core/src/storage/mod.rs b/crates/core/src/storage/mod.rs index e8245fcbcc..c7b4ed6970 100644 --- a/crates/core/src/storage/mod.rs +++ b/crates/core/src/storage/mod.rs @@ -615,8 +615,6 @@ pub mod storage_constants { mod tests { use super::*; - use std::time::Duration; - #[test] fn test_url_prefix_handler() { let store = InMemory::new(); @@ -647,7 +645,6 @@ mod tests { ); } - #[cfg(feature = "cloud")] #[test] #[cfg(feature = "cloud")] fn test_retry_config_from_options() { From e279555142e075cb407bf781c52ffe6c040a4ab8 Mon Sep 17 00:00:00 2001 From: Robert Pack Date: Fri, 11 Apr 2025 18:51:23 +0200 Subject: [PATCH 19/23] fix: MRSV Signed-off-by: Robert Pack --- .github/workflows/build.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index c44ebe1d9e..aa634cb789 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -25,7 +25,7 @@ jobs: uses: actions-rs/toolchain@v1 with: profile: default - toolchain: '1.82' + toolchain: "1.82" override: true - name: Build @@ -40,7 +40,7 @@ jobs: uses: actions-rs/toolchain@v1 with: profile: default - toolchain: "1.81" + toolchain: "1.82" override: true - name: Format @@ -62,7 +62,7 @@ jobs: uses: actions-rs/toolchain@v1 with: profile: default - toolchain: "1.81" + toolchain: "1.82" override: true - name: build and lint with clippy @@ -92,7 +92,7 @@ jobs: uses: actions-rs/toolchain@v1 with: profile: default - toolchain: "1.81" + toolchain: "1.82" override: true - name: Load DAT data @@ -124,7 +124,7 @@ jobs: uses: actions-rs/toolchain@v1 with: profile: default - toolchain: "1.81" + toolchain: "1.82" override: true # Install Java and Hadoop for HDFS integration tests @@ -168,7 +168,7 @@ jobs: uses: actions-rs/toolchain@v1 with: profile: default - toolchain: "1.81" + toolchain: "1.82" override: true - name: Download Lakectl From 7004eb74e79b782ec8146b147da0749cede6ee88 Mon Sep 17 00:00:00 2001 From: Robert Pack Date: Fri, 11 Apr 2025 19:01:38 +0200 Subject: [PATCH 20/23] fix: eager test Signed-off-by: Robert Pack --- crates/core/src/kernel/snapshot_next/eager.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/crates/core/src/kernel/snapshot_next/eager.rs b/crates/core/src/kernel/snapshot_next/eager.rs index 8c3b636f2b..96866ea642 100644 --- a/crates/core/src/kernel/snapshot_next/eager.rs +++ b/crates/core/src/kernel/snapshot_next/eager.rs @@ -5,6 +5,7 @@ use arrow_array::RecordBatch; use delta_kernel::actions::visitors::SetTransactionMap; use delta_kernel::actions::{Add, Metadata, Protocol, SetTransaction}; use delta_kernel::engine::arrow_extensions::ScanExt; +use delta_kernel::scan::scan_row_schema; use delta_kernel::schema::Schema; use delta_kernel::table_properties::TableProperties; use delta_kernel::{ExpressionRef, Table, Version}; @@ -107,6 +108,11 @@ impl EagerSnapshot { .require_files .then(|| -> DeltaResult<_> { let all: Vec = snapshot.logical_files(None)?.try_collect()?; + if all.is_empty() { + return Ok(RecordBatch::new_empty(Arc::new( + (&scan_row_schema()).try_into()?, + ))); + } Ok(concat_batches(&all[0].schema(), &all)?) }) .transpose()?; From 15afe77fedd57fe8d0044b2597600c22e968778a Mon Sep 17 00:00:00 2001 From: Robert Pack Date: Fri, 11 Apr 2025 21:13:37 +0200 Subject: [PATCH 21/23] chore: clippy Signed-off-by: Robert Pack --- crates/catalog-unity/src/datafusion.rs | 2 +- crates/catalog-unity/src/lib.rs | 3 +- crates/core/src/operations/create.rs | 2 +- crates/core/src/operations/optimize.rs | 20 +++---- crates/core/src/operations/restore.rs | 7 +-- crates/core/src/protocol/parquet_read/mod.rs | 7 +-- crates/core/src/storage/mod.rs | 3 - crates/core/src/writer/stats.rs | 60 ++++++++++---------- python/src/error.rs | 9 ++- python/src/schema.rs | 2 +- 10 files changed, 51 insertions(+), 64 deletions(-) diff --git a/crates/catalog-unity/src/datafusion.rs b/crates/catalog-unity/src/datafusion.rs index 41486b979b..0b07bded5d 100644 --- a/crates/catalog-unity/src/datafusion.rs +++ b/crates/catalog-unity/src/datafusion.rs @@ -162,7 +162,7 @@ impl UnitySchemaProvider { { ListTableSummariesResponse::Success { tables, .. } => tables .into_iter() - .filter_map(|t| t.full_name.split('.').last().map(|n| n.into())) + .filter_map(|t| t.full_name.split('.').next_back().map(|n| n.into())) .collect(), ListTableSummariesResponse::Error(_) => vec![], }; diff --git a/crates/catalog-unity/src/lib.rs b/crates/catalog-unity/src/lib.rs index 5d76c98a8b..63eae686d4 100644 --- a/crates/catalog-unity/src/lib.rs +++ b/crates/catalog-unity/src/lib.rs @@ -845,8 +845,7 @@ impl ObjectStoreFactory for UnityCatalogFactory { ) -> DeltaResult<(ObjectStoreRef, Path)> { let (table_path, temp_creds) = UnityCatalogBuilder::execute_uc_future( UnityCatalogBuilder::get_uc_location_and_token(table_uri.as_str()), - )? - .map_err(UnityCatalogError::from)?; + )??; let mut storage_options = options.0.clone(); storage_options.extend(temp_creds); diff --git a/crates/core/src/operations/create.rs b/crates/core/src/operations/create.rs index 090b116637..6354f8ea81 100644 --- a/crates/core/src/operations/create.rs +++ b/crates/core/src/operations/create.rs @@ -144,7 +144,7 @@ impl CreateBuilder { if let Value::Number(n) = v { n.as_i64().map_or_else( || MetadataValue::String(v.to_string()), - |i| MetadataValue::Number(i), + MetadataValue::Number, ) } else { MetadataValue::String(v.to_string()) diff --git a/crates/core/src/operations/optimize.rs b/crates/core/src/operations/optimize.rs index 503e09ffcb..8b90b35bfe 100644 --- a/crates/core/src/operations/optimize.rs +++ b/crates/core/src/operations/optimize.rs @@ -485,11 +485,7 @@ impl MergePlan { let mut partial_actions = files .iter() .map(|file_meta| { - create_remove( - file_meta.path.as_ref(), - &partition_values, - file_meta.size as i64, - ) + create_remove(file_meta.path.as_ref(), &partition_values, file_meta.size) }) .collect::, DeltaTableError>>()?; @@ -497,9 +493,9 @@ impl MergePlan { .iter() .fold(MetricDetails::default(), |mut curr, file| { curr.total_files += 1; - curr.total_size += file.size as i64; - curr.max = std::cmp::max(curr.max, file.size as i64); - curr.min = std::cmp::min(curr.min, file.size as i64); + curr.total_size += file.size; + curr.max = std::cmp::max(curr.max, file.size); + curr.min = std::cmp::min(curr.min, file.size); curr }); @@ -538,7 +534,7 @@ impl MergePlan { true, )?; partial_metrics.num_batches += 1; - writer.write(&batch).await.map_err(DeltaTableError::from)?; + writer.write(&batch).await?; } let add_actions = writer.close().await?.into_iter().map(|mut add| { @@ -670,7 +666,7 @@ impl MergePlan { let scan_config = DeltaScanConfigBuilder::default() .with_file_column(false) .with_schema(snapshot.input_schema()?) - .build(&snapshot)?; + .build(snapshot)?; // For each rewrite evaluate the predicate and then modify each expression // to either compute the new value or obtain the old one then write these batches @@ -855,7 +851,7 @@ impl MergeBin { } fn add(&mut self, add: Add) { - self.size_bytes += add.size as i64; + self.size_bytes += add.size; self.files.push(add); } @@ -913,7 +909,7 @@ fn build_compaction_plan( 'files: for file in files { for bin in merge_bins.iter_mut() { - if bin.total_file_size() + file.size as i64 <= target_size { + if bin.total_file_size() + file.size <= target_size { bin.add(file); // Move to next file continue 'files; diff --git a/crates/core/src/operations/restore.rs b/crates/core/src/operations/restore.rs index e452110927..597b1bf74a 100644 --- a/crates/core/src/operations/restore.rs +++ b/crates/core/src/operations/restore.rs @@ -369,10 +369,9 @@ impl std::future::IntoFuture for RestoreBuilder { #[cfg(test)] mod tests { - use super::*; - use crate::writer::test_utils::{create_bare_table, get_arrow_schema, get_record_batch}; - use crate::{DeltaOps, DeltaResult, DeltaTable}; + use crate::writer::test_utils::{create_bare_table, get_record_batch}; + use crate::{DeltaOps, DeltaResult}; /// Verify that restore respects constraints that were added/removed in previous version_to_restore /// @@ -406,7 +405,7 @@ mod tests { assert_ne!(table.version(), first_v); let constraints = table.state.unwrap().table_config().get_constraints(); - assert!(constraints.len() == 0); + assert!(constraints.is_empty()); Ok(()) } diff --git a/crates/core/src/protocol/parquet_read/mod.rs b/crates/core/src/protocol/parquet_read/mod.rs index 6b48e55c1d..f643486583 100644 --- a/crates/core/src/protocol/parquet_read/mod.rs +++ b/crates/core/src/protocol/parquet_read/mod.rs @@ -73,12 +73,7 @@ impl DeletionVectorDescriptor { })? .clone(); } - "offset" => { - re.offset = match record.get_int(i) { - Ok(x) => Some(x), - _ => None, - } - } + "offset" => re.offset = record.get_int(i).ok(), "sizeInBytes" => { re.size_in_bytes = record.get_int(i).map_err(|_| { gen_action_type_error("add", "deletionVector.sizeInBytes", "int") diff --git a/crates/core/src/storage/mod.rs b/crates/core/src/storage/mod.rs index e8245fcbcc..9eb54ee02c 100644 --- a/crates/core/src/storage/mod.rs +++ b/crates/core/src/storage/mod.rs @@ -615,8 +615,6 @@ pub mod storage_constants { mod tests { use super::*; - use std::time::Duration; - #[test] fn test_url_prefix_handler() { let store = InMemory::new(); @@ -651,7 +649,6 @@ mod tests { #[test] #[cfg(feature = "cloud")] fn test_retry_config_from_options() { - use maplit::hashmap; use std::time::Duration; struct TestFactory {} diff --git a/crates/core/src/writer/stats.rs b/crates/core/src/writer/stats.rs index 7859f27121..671beb6a04 100644 --- a/crates/core/src/writer/stats.rs +++ b/crates/core/src/writer/stats.rs @@ -1071,28 +1071,30 @@ mod tests { .join("\n") }); static JSON_ROWS: LazyLock> = LazyLock::new(|| { - std::iter::repeat(json!({ - "meta": { - "kafka": { - "offset": 0, - "partition": 0, - "topic": "some_topic" - }, - "producer": { - "timestamp": "2021-06-22" + std::iter::repeat_n( + json!({ + "meta": { + "kafka": { + "offset": 0, + "partition": 0, + "topic": "some_topic" + }, + "producer": { + "timestamp": "2021-06-22" + }, }, - }, - "some_string": "GET", - "some_int": 302, - "some_bool": true, - "some_list": ["a", "b", "c"], - "some_nested_list": [[42], [84]], - "date": "2021-06-22", - "uuid": "176c770d-92af-4a21-bf76-5d8c5261d659", - })) - .take(100) - .chain( - std::iter::repeat(json!({ + "some_string": "GET", + "some_int": 302, + "some_bool": true, + "some_list": ["a", "b", "c"], + "some_nested_list": [[42], [84]], + "date": "2021-06-22", + "uuid": "176c770d-92af-4a21-bf76-5d8c5261d659", + }), + 100, + ) + .chain(std::iter::repeat_n( + json!({ "meta": { "kafka": { "offset": 100, @@ -1110,11 +1112,11 @@ mod tests { "some_nested_list": [[42], [84]], "date": "2021-06-22", "uuid": "54f3e867-3f7b-4122-a452-9d74fb4fe1ba", - })) - .take(100), - ) - .chain( - std::iter::repeat(json!({ + }), + 100, + )) + .chain(std::iter::repeat_n( + json!({ "meta": { "kafka": { "offset": 0, @@ -1128,9 +1130,9 @@ mod tests { "some_nested_list": [[42], null], "date": "2021-06-22", "uuid": "a98bea04-d119-4f21-8edc-eb218b5849af", - })) - .take(100), - ) + }), + 100, + )) .collect() }); } diff --git a/python/src/error.rs b/python/src/error.rs index 84809af19b..cca8148aef 100644 --- a/python/src/error.rs +++ b/python/src/error.rs @@ -66,7 +66,7 @@ impl Display for DisplaySourceChain { for err_part in err_msg.split(": ").flat_map(|s| s.split("\ncaused by\n")) { if !err_part.is_empty() && !out_parts.contains(&err_part) - && !out_parts.iter().map(|p| p.contains(&err_part)).any(|v| v) + && !out_parts.iter().any(|p| p.contains(err_part)) { out_parts.push(err_part); } @@ -74,13 +74,12 @@ impl Display for DisplaySourceChain { } for (i, part) in out_parts.iter().enumerate() { if i == 0 { - write!(f, "{}\n", part)?; + writeln!(f, "{}", part)?; } else { - write!( + writeln!( f, - "{}\x1b[31m{}\x1b[0m {}\n", + "{}\x1b[31m↳\x1b[0m {}", " ".repeat(self.error_name.len() + ": ".len() + i), - "↳", part )?; } diff --git a/python/src/schema.rs b/python/src/schema.rs index 5824ba71b9..47d7b43332 100644 --- a/python/src/schema.rs +++ b/python/src/schema.rs @@ -441,7 +441,7 @@ impl Field { match v { serde_json::Value::Number(n) => n.as_i64().map_or_else( || MetadataValue::String(v.to_string()), - |i| MetadataValue::Number(i), + MetadataValue::Number, ), serde_json::Value::String(s) => MetadataValue::String(s.to_string()), other => MetadataValue::String(other.to_string()), From 0a5798a38964fc8fec7ed02dffb5f87d99b20329 Mon Sep 17 00:00:00 2001 From: Robert Pack Date: Fri, 11 Apr 2025 21:59:37 +0200 Subject: [PATCH 22/23] refactor: move transaction module to kernel Signed-off-by: Robert Pack --- crates/aws/src/logstore/default_logstore.rs | 2 +- crates/aws/src/logstore/dynamodb_logstore.rs | 2 +- crates/core/src/errors.rs | 2 +- crates/core/src/kernel/mod.rs | 1 + .../core/src/kernel/snapshot/log_segment.rs | 4 +-- crates/core/src/kernel/snapshot/mod.rs | 3 +-- crates/core/src/kernel/snapshot/replay.rs | 2 +- .../transaction/application.rs | 2 +- .../transaction/conflict_checker.rs | 0 .../{operations => kernel}/transaction/mod.rs | 11 ++++---- .../transaction/protocol.rs | 3 ++- .../transaction/state.rs | 0 crates/core/src/logstore/default_logstore.rs | 8 +++--- crates/core/src/logstore/mod.rs | 2 +- crates/core/src/operations/add_column.rs | 2 +- crates/core/src/operations/add_feature.rs | 2 +- crates/core/src/operations/constraints.rs | 9 +++---- .../core/src/operations/convert_to_delta.rs | 5 ++-- crates/core/src/operations/create.rs | 2 +- crates/core/src/operations/delete.rs | 2 +- .../core/src/operations/drop_constraints.rs | 2 +- .../core/src/operations/filesystem_check.rs | 3 +-- crates/core/src/operations/load.rs | 2 +- crates/core/src/operations/merge/mod.rs | 5 +--- crates/core/src/operations/mod.rs | 1 - crates/core/src/operations/optimize.rs | 8 +++--- crates/core/src/operations/restore.rs | 5 ++-- .../core/src/operations/set_tbl_properties.rs | 2 +- crates/core/src/operations/update.rs | 26 +++++++------------ .../src/operations/update_field_metadata.rs | 2 +- crates/core/src/operations/vacuum.rs | 2 +- crates/core/src/operations/write/mod.rs | 2 +- crates/core/src/protocol/checkpoints.rs | 2 +- crates/core/src/table/state.rs | 2 +- .../core/src/test_utils/factories/actions.rs | 2 +- crates/core/src/writer/mod.rs | 2 +- crates/core/tests/command_merge.rs | 2 +- crates/core/tests/command_optimize.rs | 2 +- crates/core/tests/commit_info_format.rs | 2 +- crates/core/tests/fs_common/mod.rs | 2 +- crates/lakefs/src/client.rs | 2 +- crates/lakefs/src/errors.rs | 2 +- crates/lakefs/src/logstore.rs | 4 +-- crates/test/src/concurrent.rs | 2 +- crates/test/src/lib.rs | 2 +- python/src/lib.rs | 4 +-- 46 files changed, 69 insertions(+), 87 deletions(-) rename crates/core/src/{operations => kernel}/transaction/application.rs (97%) rename crates/core/src/{operations => kernel}/transaction/conflict_checker.rs (100%) rename crates/core/src/{operations => kernel}/transaction/mod.rs (99%) rename crates/core/src/{operations => kernel}/transaction/protocol.rs (99%) rename crates/core/src/{operations => kernel}/transaction/state.rs (100%) diff --git a/crates/aws/src/logstore/default_logstore.rs b/crates/aws/src/logstore/default_logstore.rs index 7677010f15..0a73bd33b7 100644 --- a/crates/aws/src/logstore/default_logstore.rs +++ b/crates/aws/src/logstore/default_logstore.rs @@ -5,7 +5,7 @@ use std::sync::Arc; use bytes::Bytes; use deltalake_core::logstore::*; use deltalake_core::{ - operations::transaction::TransactionError, + kernel::transaction::TransactionError, storage::{ObjectStoreRef, StorageOptions}, DeltaResult, }; diff --git a/crates/aws/src/logstore/dynamodb_logstore.rs b/crates/aws/src/logstore/dynamodb_logstore.rs index f5680e091c..69e282f282 100644 --- a/crates/aws/src/logstore/dynamodb_logstore.rs +++ b/crates/aws/src/logstore/dynamodb_logstore.rs @@ -14,7 +14,7 @@ use url::Url; use deltalake_core::logstore::*; use deltalake_core::{ - operations::transaction::TransactionError, + kernel::transaction::TransactionError, storage::{ObjectStoreRef, StorageOptions}, DeltaResult, DeltaTableError, }; diff --git a/crates/core/src/errors.rs b/crates/core/src/errors.rs index 9980cf23ad..c97d753408 100644 --- a/crates/core/src/errors.rs +++ b/crates/core/src/errors.rs @@ -2,7 +2,7 @@ use chrono::{DateTime, Utc}; use object_store::Error as ObjectStoreError; -use crate::operations::transaction::{CommitBuilderError, TransactionError}; +use crate::kernel::transaction::{CommitBuilderError, TransactionError}; use crate::protocol::ProtocolError; /// A result returned by delta-rs diff --git a/crates/core/src/kernel/mod.rs b/crates/core/src/kernel/mod.rs index efac04489d..6a9ba71c94 100644 --- a/crates/core/src/kernel/mod.rs +++ b/crates/core/src/kernel/mod.rs @@ -10,6 +10,7 @@ pub mod error; pub mod models; pub mod scalars; mod snapshot; +pub mod transaction; pub use error::*; pub use models::*; diff --git a/crates/core/src/kernel/snapshot/log_segment.rs b/crates/core/src/kernel/snapshot/log_segment.rs index 015317a3ac..8f663bb41f 100644 --- a/crates/core/src/kernel/snapshot/log_segment.rs +++ b/crates/core/src/kernel/snapshot/log_segment.rs @@ -16,9 +16,9 @@ use serde::{Deserialize, Serialize}; use tracing::debug; use super::parse; +use crate::kernel::transaction::CommitData; use crate::kernel::{arrow::json, ActionType, Metadata, Protocol, Schema, StructType}; use crate::logstore::LogStore; -use crate::operations::transaction::CommitData; use crate::{DeltaResult, DeltaTableConfig, DeltaTableError}; const LAST_CHECKPOINT_FILE_NAME: &str = "_last_checkpoint"; @@ -584,8 +584,8 @@ pub(super) mod tests { use crate::{ checkpoints::{create_checkpoint_for, create_checkpoint_from_table_uri_and_cleanup}, + kernel::transaction::{CommitBuilder, TableReference}, kernel::{Action, Add, Format, Remove}, - operations::transaction::{CommitBuilder, TableReference}, protocol::{DeltaOperation, SaveMode}, DeltaTableBuilder, }; diff --git a/crates/core/src/kernel/snapshot/mod.rs b/crates/core/src/kernel/snapshot/mod.rs index 36c4cd221a..24ca090254 100644 --- a/crates/core/src/kernel/snapshot/mod.rs +++ b/crates/core/src/kernel/snapshot/mod.rs @@ -36,10 +36,9 @@ use super::{ Transaction, }; use crate::kernel::parse::read_cdf_adds; +use crate::kernel::transaction::{CommitData, PROTOCOL}; use crate::kernel::{ActionType, StructType}; use crate::logstore::LogStore; -use crate::operations::transaction::CommitData; -use crate::operations::transaction::PROTOCOL; use crate::table::config::TableConfig; use crate::{DeltaResult, DeltaTableConfig, DeltaTableError}; diff --git a/crates/core/src/kernel/snapshot/replay.rs b/crates/core/src/kernel/snapshot/replay.rs index e7b4149067..cfe812b2f2 100644 --- a/crates/core/src/kernel/snapshot/replay.rs +++ b/crates/core/src/kernel/snapshot/replay.rs @@ -609,8 +609,8 @@ pub(super) mod tests { use super::super::{log_segment::LogSegment, partitions_schema, stats_schema}; use super::*; + use crate::kernel::transaction::CommitData; use crate::kernel::{models::ActionType, StructType}; - use crate::operations::transaction::CommitData; use crate::protocol::DeltaOperation; use crate::table::config::TableConfig; use crate::test_utils::{ActionFactory, TestResult, TestSchemas}; diff --git a/crates/core/src/operations/transaction/application.rs b/crates/core/src/kernel/transaction/application.rs similarity index 97% rename from crates/core/src/operations/transaction/application.rs rename to crates/core/src/kernel/transaction/application.rs index c2f08e8838..1ae9b9d1c8 100644 --- a/crates/core/src/operations/transaction/application.rs +++ b/crates/core/src/kernel/transaction/application.rs @@ -1,7 +1,7 @@ #[cfg(test)] mod tests { use crate::{ - checkpoints, kernel::Transaction, operations::transaction::CommitProperties, + checkpoints, kernel::transaction::CommitProperties, kernel::Transaction, protocol::SaveMode, writer::test_utils::get_record_batch, DeltaOps, DeltaTableBuilder, }; diff --git a/crates/core/src/operations/transaction/conflict_checker.rs b/crates/core/src/kernel/transaction/conflict_checker.rs similarity index 100% rename from crates/core/src/operations/transaction/conflict_checker.rs rename to crates/core/src/kernel/transaction/conflict_checker.rs diff --git a/crates/core/src/operations/transaction/mod.rs b/crates/core/src/kernel/transaction/mod.rs similarity index 99% rename from crates/core/src/operations/transaction/mod.rs rename to crates/core/src/kernel/transaction/mod.rs index 3f055afe06..1908d1dcda 100644 --- a/crates/core/src/operations/transaction/mod.rs +++ b/crates/core/src/kernel/transaction/mod.rs @@ -86,22 +86,23 @@ use serde_json::Value; use tracing::*; use uuid::Uuid; -pub use self::conflict_checker::CommitConflictError; +use delta_kernel::table_features::{ReaderFeature, WriterFeature}; +use serde::{Deserialize, Serialize}; + use self::conflict_checker::{TransactionInfo, WinningCommitSummary}; -pub use self::protocol::INSTANCE as PROTOCOL; use crate::checkpoints::{cleanup_expired_logs_for, create_checkpoint_for}; use crate::errors::DeltaTableError; use crate::kernel::{Action, CommitInfo, EagerSnapshot, Metadata, Protocol, Transaction}; use crate::logstore::{CommitOrBytes, LogStoreRef}; +use crate::operations::CustomExecuteHandler; use crate::protocol::DeltaOperation; use crate::storage::ObjectStoreRef; use crate::table::config::TableConfig; use crate::table::state::DeltaTableState; use crate::{crate_version, DeltaResult}; -use delta_kernel::table_features::{ReaderFeature, WriterFeature}; -use serde::{Deserialize, Serialize}; -use super::CustomExecuteHandler; +pub use self::conflict_checker::CommitConflictError; +pub use self::protocol::INSTANCE as PROTOCOL; #[cfg(test)] pub(crate) mod application; diff --git a/crates/core/src/operations/transaction/protocol.rs b/crates/core/src/kernel/transaction/protocol.rs similarity index 99% rename from crates/core/src/operations/transaction/protocol.rs rename to crates/core/src/kernel/transaction/protocol.rs index 495638b3c0..f5e2f5fbf0 100644 --- a/crates/core/src/operations/transaction/protocol.rs +++ b/crates/core/src/kernel/transaction/protocol.rs @@ -1,11 +1,12 @@ use std::collections::HashSet; use std::sync::LazyLock; +use delta_kernel::table_features::{ReaderFeature, WriterFeature}; + use super::{TableReference, TransactionError}; use crate::kernel::{contains_timestampntz, Action, EagerSnapshot, Protocol, Schema}; use crate::protocol::DeltaOperation; use crate::table::state::DeltaTableState; -use delta_kernel::table_features::{ReaderFeature, WriterFeature}; static READER_V2: LazyLock> = LazyLock::new(|| HashSet::from_iter([ReaderFeature::ColumnMapping])); diff --git a/crates/core/src/operations/transaction/state.rs b/crates/core/src/kernel/transaction/state.rs similarity index 100% rename from crates/core/src/operations/transaction/state.rs rename to crates/core/src/kernel/transaction/state.rs diff --git a/crates/core/src/logstore/default_logstore.rs b/crates/core/src/logstore/default_logstore.rs index e5c3a54e53..f9dad9cccc 100644 --- a/crates/core/src/logstore/default_logstore.rs +++ b/crates/core/src/logstore/default_logstore.rs @@ -7,11 +7,9 @@ use object_store::{Attributes, Error as ObjectStoreError, ObjectStore, PutOption use uuid::Uuid; use super::{CommitOrBytes, LogStore, LogStoreConfig}; -use crate::{ - operations::transaction::TransactionError, - storage::{commit_uri_from_version, ObjectStoreRef}, - DeltaResult, -}; +use crate::kernel::transaction::TransactionError; +use crate::storage::{commit_uri_from_version, ObjectStoreRef}; +use crate::DeltaResult; fn put_options() -> &'static PutOptions { static PUT_OPTS: OnceLock = OnceLock::new(); diff --git a/crates/core/src/logstore/mod.rs b/crates/core/src/logstore/mod.rs index 6a1e826a23..f037d9784f 100644 --- a/crates/core/src/logstore/mod.rs +++ b/crates/core/src/logstore/mod.rs @@ -18,8 +18,8 @@ use url::Url; use uuid::Uuid; use crate::kernel::log_segment::PathExt; +use crate::kernel::transaction::TransactionError; use crate::kernel::Action; -use crate::operations::transaction::TransactionError; use crate::protocol::{get_last_checkpoint, ProtocolError}; use crate::storage::DeltaIOStorageBackend; use crate::storage::{ diff --git a/crates/core/src/operations/add_column.rs b/crates/core/src/operations/add_column.rs index e6646edb9c..75063963ba 100644 --- a/crates/core/src/operations/add_column.rs +++ b/crates/core/src/operations/add_column.rs @@ -6,8 +6,8 @@ use delta_kernel::schema::StructType; use futures::future::BoxFuture; use itertools::Itertools; -use super::transaction::{CommitBuilder, CommitProperties}; use super::{CustomExecuteHandler, Operation}; +use crate::kernel::transaction::{CommitBuilder, CommitProperties}; use crate::kernel::{StructField, StructTypeExt}; use crate::logstore::LogStoreRef; use crate::operations::cast::merge_schema::merge_delta_struct; diff --git a/crates/core/src/operations/add_feature.rs b/crates/core/src/operations/add_feature.rs index 97777e6b10..ddc3a0b531 100644 --- a/crates/core/src/operations/add_feature.rs +++ b/crates/core/src/operations/add_feature.rs @@ -6,8 +6,8 @@ use delta_kernel::table_features::{ReaderFeature, WriterFeature}; use futures::future::BoxFuture; use itertools::Itertools; -use super::transaction::{CommitBuilder, CommitProperties}; use super::{CustomExecuteHandler, Operation}; +use crate::kernel::transaction::{CommitBuilder, CommitProperties}; use crate::kernel::TableFeatures; use crate::logstore::LogStoreRef; use crate::protocol::DeltaOperation; diff --git a/crates/core/src/operations/constraints.rs b/crates/core/src/operations/constraints.rs index 97ea29c38e..be71314faf 100644 --- a/crates/core/src/operations/constraints.rs +++ b/crates/core/src/operations/constraints.rs @@ -7,13 +7,17 @@ use datafusion::execution::{SendableRecordBatchStream, TaskContext}; use datafusion::prelude::SessionContext; use datafusion_common::ToDFSchema; use datafusion_physical_plan::ExecutionPlan; +use delta_kernel::table_features::WriterFeature; use futures::future::BoxFuture; use futures::StreamExt; +use super::datafusion_utils::into_expr; +use super::{CustomExecuteHandler, Operation}; use crate::delta_datafusion::expr::fmt_expr_to_sql; use crate::delta_datafusion::{ register_store, DeltaDataChecker, DeltaScanBuilder, DeltaSessionContext, }; +use crate::kernel::transaction::{CommitBuilder, CommitProperties}; use crate::kernel::Protocol; use crate::logstore::LogStoreRef; use crate::operations::datafusion_utils::Expression; @@ -21,11 +25,6 @@ use crate::protocol::DeltaOperation; use crate::table::state::DeltaTableState; use crate::table::Constraint; use crate::{DeltaResult, DeltaTable, DeltaTableError}; -use delta_kernel::table_features::WriterFeature; - -use super::datafusion_utils::into_expr; -use super::transaction::{CommitBuilder, CommitProperties}; -use super::{CustomExecuteHandler, Operation}; /// Build a constraint to add to a table pub struct ConstraintBuilder { diff --git a/crates/core/src/operations/convert_to_delta.rs b/crates/core/src/operations/convert_to_delta.rs index f4c563fbc5..e6f72d25fe 100644 --- a/crates/core/src/operations/convert_to_delta.rs +++ b/crates/core/src/operations/convert_to_delta.rs @@ -16,6 +16,8 @@ use percent_encoding::percent_decode_str; use tracing::debug; use uuid::Uuid; +use super::{CustomExecuteHandler, Operation}; +use crate::kernel::transaction::CommitProperties; use crate::operations::get_num_idx_cols_and_stats_columns; use crate::{ kernel::{scalars::ScalarExt, Add, DataType, Schema, StructField}, @@ -28,9 +30,6 @@ use crate::{ DeltaResult, DeltaTable, DeltaTableError, ObjectStoreError, NULL_PARTITION_VALUE_DATA_PATH, }; -use super::transaction::CommitProperties; -use super::{CustomExecuteHandler, Operation}; - /// Error converting a Parquet table to a Delta table #[derive(Debug, thiserror::Error)] enum Error { diff --git a/crates/core/src/operations/create.rs b/crates/core/src/operations/create.rs index 090b116637..3a8bbb2bc1 100644 --- a/crates/core/src/operations/create.rs +++ b/crates/core/src/operations/create.rs @@ -10,9 +10,9 @@ use serde_json::Value; use tracing::log::*; use uuid::Uuid; -use super::transaction::{CommitBuilder, CommitProperties, TableReference, PROTOCOL}; use super::{CustomExecuteHandler, Operation}; use crate::errors::{DeltaResult, DeltaTableError}; +use crate::kernel::transaction::{CommitBuilder, CommitProperties, TableReference, PROTOCOL}; use crate::kernel::{Action, DataType, Metadata, Protocol, StructField, StructType}; use crate::logstore::{LogStore, LogStoreRef}; use crate::protocol::{DeltaOperation, SaveMode}; diff --git a/crates/core/src/operations/delete.rs b/crates/core/src/operations/delete.rs index c6d943e1c0..34ffde8a6c 100644 --- a/crates/core/src/operations/delete.rs +++ b/crates/core/src/operations/delete.rs @@ -40,7 +40,6 @@ use serde::Serialize; use super::cdc::should_write_cdc; use super::datafusion_utils::Expression; -use super::transaction::{CommitBuilder, CommitProperties, PROTOCOL}; use super::Operation; use crate::delta_datafusion::expr::fmt_expr_to_sql; use crate::delta_datafusion::logical::MetricObserver; @@ -51,6 +50,7 @@ use crate::delta_datafusion::{ DeltaTableProvider, }; use crate::errors::DeltaResult; +use crate::kernel::transaction::{CommitBuilder, CommitProperties, PROTOCOL}; use crate::kernel::{Action, Add, Remove}; use crate::logstore::LogStoreRef; use crate::operations::write::execution::{write_execution_plan, write_execution_plan_cdc}; diff --git a/crates/core/src/operations/drop_constraints.rs b/crates/core/src/operations/drop_constraints.rs index 6743c3415a..2c362913d6 100644 --- a/crates/core/src/operations/drop_constraints.rs +++ b/crates/core/src/operations/drop_constraints.rs @@ -4,8 +4,8 @@ use std::sync::Arc; use futures::future::BoxFuture; -use super::transaction::{CommitBuilder, CommitProperties}; use super::{CustomExecuteHandler, Operation}; +use crate::kernel::transaction::{CommitBuilder, CommitProperties}; use crate::kernel::Action; use crate::logstore::LogStoreRef; use crate::protocol::DeltaOperation; diff --git a/crates/core/src/operations/filesystem_check.rs b/crates/core/src/operations/filesystem_check.rs index 32591f2812..e6474b285d 100644 --- a/crates/core/src/operations/filesystem_check.rs +++ b/crates/core/src/operations/filesystem_check.rs @@ -20,16 +20,15 @@ use std::time::UNIX_EPOCH; use futures::future::BoxFuture; use futures::StreamExt; -pub use object_store::path::Path; use object_store::ObjectStore; use serde::{de::Error as DeError, Deserialize, Deserializer, Serialize, Serializer}; use url::{ParseError, Url}; use uuid::Uuid; -use super::transaction::{CommitBuilder, CommitProperties}; use super::CustomExecuteHandler; use super::Operation; use crate::errors::{DeltaResult, DeltaTableError}; +use crate::kernel::transaction::{CommitBuilder, CommitProperties}; use crate::kernel::{Action, Add, Remove}; use crate::logstore::LogStoreRef; use crate::protocol::DeltaOperation; diff --git a/crates/core/src/operations/load.rs b/crates/core/src/operations/load.rs index cf7f0c93ca..c52b19f03b 100644 --- a/crates/core/src/operations/load.rs +++ b/crates/core/src/operations/load.rs @@ -6,10 +6,10 @@ use datafusion::physical_plan::coalesce_partitions::CoalescePartitionsExec; use datafusion::physical_plan::{ExecutionPlan, SendableRecordBatchStream}; use futures::future::BoxFuture; -use super::transaction::PROTOCOL; use super::CustomExecuteHandler; use crate::delta_datafusion::DataFusionMixins; use crate::errors::{DeltaResult, DeltaTableError}; +use crate::kernel::transaction::PROTOCOL; use crate::logstore::LogStoreRef; use crate::table::state::DeltaTableState; use crate::DeltaTable; diff --git a/crates/core/src/operations/merge/mod.rs b/crates/core/src/operations/merge/mod.rs index 782b070856..d14b60c6f3 100644 --- a/crates/core/src/operations/merge/mod.rs +++ b/crates/core/src/operations/merge/mod.rs @@ -66,9 +66,7 @@ use tracing::log::*; use uuid::Uuid; use self::barrier::{MergeBarrier, MergeBarrierExec}; - use super::datafusion_utils::{into_expr, maybe_into_expr, Expression}; -use super::transaction::{CommitProperties, PROTOCOL}; use super::{CustomExecuteHandler, Operation}; use crate::delta_datafusion::expr::{fmt_expr_to_sql, parse_predicate_expression}; use crate::delta_datafusion::logical::MetricObserver; @@ -78,13 +76,12 @@ use crate::delta_datafusion::{ register_store, DataFusionMixins, DeltaColumn, DeltaScan, DeltaScanConfigBuilder, DeltaSessionConfig, DeltaTableProvider, }; - +use crate::kernel::transaction::{CommitBuilder, CommitProperties, PROTOCOL}; use crate::kernel::{Action, Metadata, StructTypeExt}; use crate::logstore::LogStoreRef; use crate::operations::cast::merge_schema::{merge_arrow_field, merge_arrow_schema}; use crate::operations::cdc::*; use crate::operations::merge::barrier::find_node; -use crate::operations::transaction::CommitBuilder; use crate::operations::write::execution::write_execution_plan_v2; use crate::operations::write::generated_columns::{ add_generated_columns, add_missing_generated_columns, diff --git a/crates/core/src/operations/mod.rs b/crates/core/src/operations/mod.rs index 6f541395bf..f17f630d98 100644 --- a/crates/core/src/operations/mod.rs +++ b/crates/core/src/operations/mod.rs @@ -45,7 +45,6 @@ pub mod create; pub mod drop_constraints; pub mod filesystem_check; pub mod restore; -pub mod transaction; pub mod update_field_metadata; pub mod vacuum; diff --git a/crates/core/src/operations/optimize.rs b/crates/core/src/operations/optimize.rs index 503e09ffcb..ba0aa92d9c 100644 --- a/crates/core/src/operations/optimize.rs +++ b/crates/core/src/operations/optimize.rs @@ -42,21 +42,19 @@ use serde::{de::Error as DeError, Deserialize, Deserializer, Serialize, Serializ use tracing::*; use uuid::Uuid; -use super::transaction::PROTOCOL; use super::write::writer::{PartitionWriter, PartitionWriterConfig}; use super::{CustomExecuteHandler, Operation}; +use crate::delta_datafusion::DeltaTableProvider; use crate::errors::{DeltaResult, DeltaTableError}; -use crate::kernel::Add; -use crate::kernel::{scalars::ScalarExt, Action, PartitionsExt, Remove}; +use crate::kernel::transaction::{CommitBuilder, CommitProperties, DEFAULT_RETRIES, PROTOCOL}; +use crate::kernel::{scalars::ScalarExt, Action, Add, PartitionsExt, Remove}; use crate::logstore::LogStoreRef; -use crate::operations::transaction::{CommitBuilder, CommitProperties, DEFAULT_RETRIES}; use crate::protocol::DeltaOperation; use crate::storage::ObjectStoreRef; use crate::table::state::DeltaTableState; use crate::writer::utils::arrow_schema_without_partitions; use crate::{crate_version, DeltaTable, ObjectMeta, PartitionFilter}; -use crate::delta_datafusion::DeltaTableProvider; /// Metrics from Optimize #[derive(Default, Debug, PartialEq, Clone, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] diff --git a/crates/core/src/operations/restore.rs b/crates/core/src/operations/restore.rs index e452110927..ae7d0f2b04 100644 --- a/crates/core/src/operations/restore.rs +++ b/crates/core/src/operations/restore.rs @@ -33,15 +33,14 @@ use object_store::ObjectStore; use serde::Serialize; use uuid::Uuid; +use super::{CustomExecuteHandler, Operation}; +use crate::kernel::transaction::{CommitBuilder, CommitProperties, TransactionError}; use crate::kernel::{Action, Add, Protocol, Remove}; use crate::logstore::LogStoreRef; use crate::protocol::DeltaOperation; use crate::table::state::DeltaTableState; use crate::{DeltaResult, DeltaTable, DeltaTableConfig, DeltaTableError, ObjectStoreError}; -use super::transaction::{CommitBuilder, CommitProperties, TransactionError}; -use super::{CustomExecuteHandler, Operation}; - /// Errors that can occur during restore #[derive(thiserror::Error, Debug)] enum RestoreError { diff --git a/crates/core/src/operations/set_tbl_properties.rs b/crates/core/src/operations/set_tbl_properties.rs index 3e17f8c07c..ccbf120159 100644 --- a/crates/core/src/operations/set_tbl_properties.rs +++ b/crates/core/src/operations/set_tbl_properties.rs @@ -5,8 +5,8 @@ use std::sync::Arc; use futures::future::BoxFuture; -use super::transaction::{CommitBuilder, CommitProperties}; use super::{CustomExecuteHandler, Operation}; +use crate::kernel::transaction::{CommitBuilder, CommitProperties}; use crate::kernel::Action; use crate::logstore::LogStoreRef; use crate::protocol::DeltaOperation; diff --git a/crates/core/src/operations/update.rs b/crates/core/src/operations/update.rs index 7433ef57b2..aaf3173966 100644 --- a/crates/core/src/operations/update.rs +++ b/crates/core/src/operations/update.rs @@ -45,32 +45,26 @@ use serde::Serialize; use tracing::log::*; use uuid::Uuid; -use super::{ - datafusion_utils::Expression, - transaction::{CommitBuilder, CommitProperties}, -}; -use super::{transaction::PROTOCOL, write::WriterStatsConfig}; +use super::datafusion_utils::Expression; +use super::write::WriterStatsConfig; use super::{ write::execution::{write_execution_plan, write_execution_plan_cdc}, CustomExecuteHandler, Operation, }; +use crate::delta_datafusion::{ + expr::fmt_expr_to_sql, + logical::MetricObserver, + physical::{find_metric_node, get_metric, MetricObserverExec}, + DataFusionMixins, DeltaColumn, DeltaScanConfigBuilder, DeltaSessionContext, DeltaTableProvider, +}; use crate::delta_datafusion::{find_files, planner::DeltaPlanner, register_store}; +use crate::kernel::transaction::{CommitBuilder, CommitProperties, PROTOCOL}; use crate::kernel::{Action, Remove}; use crate::logstore::LogStoreRef; use crate::operations::cdc::*; use crate::protocol::DeltaOperation; use crate::table::state::DeltaTableState; -use crate::{ - delta_datafusion::{ - expr::fmt_expr_to_sql, - logical::MetricObserver, - physical::{find_metric_node, get_metric, MetricObserverExec}, - DataFusionMixins, DeltaColumn, DeltaScanConfigBuilder, DeltaSessionContext, - DeltaTableProvider, - }, - DeltaTableError, -}; -use crate::{DeltaResult, DeltaTable}; +use crate::{DeltaResult, DeltaTable, DeltaTableError}; /// Custom column name used for marking internal [RecordBatch] rows as updated pub(crate) const UPDATE_PREDICATE_COLNAME: &str = "__delta_rs_update_predicate"; diff --git a/crates/core/src/operations/update_field_metadata.rs b/crates/core/src/operations/update_field_metadata.rs index b81382dc24..2062c97960 100644 --- a/crates/core/src/operations/update_field_metadata.rs +++ b/crates/core/src/operations/update_field_metadata.rs @@ -7,8 +7,8 @@ use delta_kernel::schema::{MetadataValue, StructType}; use futures::future::BoxFuture; use itertools::Itertools; -use super::transaction::{CommitBuilder, CommitProperties}; use super::{CustomExecuteHandler, Operation}; +use crate::kernel::transaction::{CommitBuilder, CommitProperties}; use crate::logstore::LogStoreRef; use crate::protocol::DeltaOperation; use crate::table::state::DeltaTableState; diff --git a/crates/core/src/operations/vacuum.rs b/crates/core/src/operations/vacuum.rs index 4e5c46589f..1951ae7f9d 100644 --- a/crates/core/src/operations/vacuum.rs +++ b/crates/core/src/operations/vacuum.rs @@ -32,9 +32,9 @@ use object_store::Error; use object_store::{path::Path, ObjectStore}; use serde::Serialize; -use super::transaction::{CommitBuilder, CommitProperties}; use super::{CustomExecuteHandler, Operation}; use crate::errors::{DeltaResult, DeltaTableError}; +use crate::kernel::transaction::{CommitBuilder, CommitProperties}; use crate::logstore::LogStoreRef; use crate::protocol::DeltaOperation; use crate::table::state::DeltaTableState; diff --git a/crates/core/src/operations/write/mod.rs b/crates/core/src/operations/write/mod.rs index 1b2e1be830..c0efefa023 100644 --- a/crates/core/src/operations/write/mod.rs +++ b/crates/core/src/operations/write/mod.rs @@ -58,7 +58,6 @@ use tracing::log::*; use super::cdc::CDC_COLUMN_NAME; use super::datafusion_utils::Expression; -use super::transaction::{CommitBuilder, CommitProperties, TableReference, PROTOCOL}; use super::{CreateBuilder, CustomExecuteHandler, Operation}; use crate::delta_datafusion::expr::fmt_expr_to_sql; use crate::delta_datafusion::expr::parse_predicate_expression; @@ -68,6 +67,7 @@ use crate::delta_datafusion::planner::DeltaPlanner; use crate::delta_datafusion::register_store; use crate::delta_datafusion::DataFusionMixins; use crate::errors::{DeltaResult, DeltaTableError}; +use crate::kernel::transaction::{CommitBuilder, CommitProperties, TableReference, PROTOCOL}; use crate::kernel::{Action, ActionType, Metadata, StructType, StructTypeExt}; use crate::logstore::LogStoreRef; use crate::operations::cast::merge_schema::merge_arrow_schema; diff --git a/crates/core/src/protocol/checkpoints.rs b/crates/core/src/protocol/checkpoints.rs index e7e3141cd7..34bd8ef5f3 100644 --- a/crates/core/src/protocol/checkpoints.rs +++ b/crates/core/src/protocol/checkpoints.rs @@ -602,8 +602,8 @@ mod tests { use serde_json::json; use super::*; + use crate::kernel::transaction::{CommitBuilder, TableReference}; use crate::kernel::StructType; - use crate::operations::transaction::{CommitBuilder, TableReference}; use crate::operations::DeltaOps; use crate::protocol::Metadata; use crate::writer::test_utils::get_delta_schema; diff --git a/crates/core/src/table/state.rs b/crates/core/src/table/state.rs index 2a25399c42..da2b76fd97 100644 --- a/crates/core/src/table/state.rs +++ b/crates/core/src/table/state.rs @@ -60,7 +60,7 @@ impl DeltaTableState { /// Construct a delta table state object from a list of actions #[cfg(test)] pub fn from_actions(actions: Vec) -> DeltaResult { - use crate::operations::transaction::CommitData; + use crate::kernel::transaction::CommitData; use crate::protocol::{DeltaOperation, SaveMode}; let metadata = actions diff --git a/crates/core/src/test_utils/factories/actions.rs b/crates/core/src/test_utils/factories/actions.rs index bd55b260e1..72da78d772 100644 --- a/crates/core/src/test_utils/factories/actions.rs +++ b/crates/core/src/test_utils/factories/actions.rs @@ -9,8 +9,8 @@ use object_store::ObjectMeta; use super::{get_parquet_bytes, DataFactory, FileStats}; use crate::kernel::arrow::extract::{self as ex}; use crate::kernel::partitions_schema; +use crate::kernel::transaction::PROTOCOL; use crate::kernel::{Add, Metadata, Protocol, Remove, StructType}; -use crate::operations::transaction::PROTOCOL; use delta_kernel::table_features::{ReaderFeature, WriterFeature}; pub struct ActionFactory; diff --git a/crates/core/src/writer/mod.rs b/crates/core/src/writer/mod.rs index cd87459c2f..169a72bdf8 100644 --- a/crates/core/src/writer/mod.rs +++ b/crates/core/src/writer/mod.rs @@ -7,8 +7,8 @@ use parquet::errors::ParquetError; use serde_json::Value; use crate::errors::DeltaTableError; +use crate::kernel::transaction::{CommitBuilder, CommitProperties}; use crate::kernel::{Action, Add}; -use crate::operations::transaction::{CommitBuilder, CommitProperties}; use crate::protocol::{ColumnCountStat, DeltaOperation, SaveMode}; use crate::DeltaTable; diff --git a/crates/core/tests/command_merge.rs b/crates/core/tests/command_merge.rs index 7b4c3aad01..c0afc200e2 100644 --- a/crates/core/tests/command_merge.rs +++ b/crates/core/tests/command_merge.rs @@ -7,9 +7,9 @@ use datafusion::dataframe::DataFrame; use datafusion::prelude::SessionContext; use datafusion_common::Column; use datafusion_expr::{col, lit, Expr}; +use deltalake_core::kernel::transaction::TransactionError; use deltalake_core::kernel::{DataType as DeltaDataType, PrimitiveType, StructField, StructType}; use deltalake_core::operations::merge::MergeMetrics; -use deltalake_core::operations::transaction::TransactionError; use deltalake_core::protocol::SaveMode; use deltalake_core::{open_table, DeltaOps, DeltaResult, DeltaTable, DeltaTableError}; use std::sync::Arc; diff --git a/crates/core/tests/command_optimize.rs b/crates/core/tests/command_optimize.rs index 4826647750..5cd143864a 100644 --- a/crates/core/tests/command_optimize.rs +++ b/crates/core/tests/command_optimize.rs @@ -5,11 +5,11 @@ use arrow_array::{Int32Array, RecordBatch, StringArray}; use arrow_schema::{DataType as ArrowDataType, Field, Schema as ArrowSchema}; use arrow_select::concat::concat_batches; use deltalake_core::errors::DeltaTableError; +use deltalake_core::kernel::transaction::{CommitBuilder, CommitProperties}; use deltalake_core::kernel::{Action, DataType, PrimitiveType, StructField}; use deltalake_core::operations::optimize::{ create_merge_plan, MetricDetails, Metrics, OptimizeType, }; -use deltalake_core::operations::transaction::{CommitBuilder, CommitProperties}; use deltalake_core::operations::DeltaOps; use deltalake_core::protocol::DeltaOperation; use deltalake_core::storage::ObjectStoreRef; diff --git a/crates/core/tests/commit_info_format.rs b/crates/core/tests/commit_info_format.rs index df817365b3..1ba00cc9d3 100644 --- a/crates/core/tests/commit_info_format.rs +++ b/crates/core/tests/commit_info_format.rs @@ -1,8 +1,8 @@ #![allow(dead_code)] mod fs_common; +use deltalake_core::kernel::transaction::CommitBuilder; use deltalake_core::kernel::Action; -use deltalake_core::operations::transaction::CommitBuilder; use deltalake_core::protocol::{DeltaOperation, SaveMode}; use serde_json::json; use std::error::Error; diff --git a/crates/core/tests/fs_common/mod.rs b/crates/core/tests/fs_common/mod.rs index 061c0d8c58..ebc7cc71bc 100644 --- a/crates/core/tests/fs_common/mod.rs +++ b/crates/core/tests/fs_common/mod.rs @@ -1,9 +1,9 @@ use chrono::Utc; +use deltalake_core::kernel::transaction::CommitBuilder; use deltalake_core::kernel::{ Action, Add, DataType, PrimitiveType, Remove, StructField, StructType, }; use deltalake_core::operations::create::CreateBuilder; -use deltalake_core::operations::transaction::CommitBuilder; use deltalake_core::protocol::{DeltaOperation, SaveMode}; use deltalake_core::storage::{GetResult, ObjectStoreResult, StorageOptions}; use deltalake_core::DeltaTable; diff --git a/crates/lakefs/src/client.rs b/crates/lakefs/src/client.rs index 9647d086c8..548ca44f37 100644 --- a/crates/lakefs/src/client.rs +++ b/crates/lakefs/src/client.rs @@ -1,5 +1,5 @@ use dashmap::DashMap; -use deltalake_core::operations::transaction::TransactionError; +use deltalake_core::kernel::transaction::TransactionError; use deltalake_core::DeltaResult; use reqwest::Client; use reqwest::StatusCode; diff --git a/crates/lakefs/src/errors.rs b/crates/lakefs/src/errors.rs index f48c9d173a..ff994f8c1a 100644 --- a/crates/lakefs/src/errors.rs +++ b/crates/lakefs/src/errors.rs @@ -1,6 +1,6 @@ //! Errors for LakeFS log store -use deltalake_core::operations::transaction::TransactionError; +use deltalake_core::kernel::transaction::TransactionError; use deltalake_core::DeltaTableError; use reqwest::Error; diff --git a/crates/lakefs/src/logstore.rs b/crates/lakefs/src/logstore.rs index 11be99387a..dc494d83db 100644 --- a/crates/lakefs/src/logstore.rs +++ b/crates/lakefs/src/logstore.rs @@ -11,12 +11,12 @@ use deltalake_core::storage::{ commit_uri_from_version, DefaultObjectStoreRegistry, ObjectStoreRegistry, }; use deltalake_core::storage::{url_prefix_handler, DeltaIOStorageBackend, IORuntime}; -use deltalake_core::{logstore::*, DeltaTableError, Path}; use deltalake_core::{ - operations::transaction::TransactionError, + kernel::transaction::TransactionError, storage::{ObjectStoreRef, StorageOptions}, DeltaResult, }; +use deltalake_core::{logstore::*, DeltaTableError, Path}; use object_store::{Attributes, Error as ObjectStoreError, ObjectStore, PutOptions, TagSet}; use tracing::debug; use url::Url; diff --git a/crates/test/src/concurrent.rs b/crates/test/src/concurrent.rs index aed4576925..e06e752270 100644 --- a/crates/test/src/concurrent.rs +++ b/crates/test/src/concurrent.rs @@ -3,8 +3,8 @@ use std::future::Future; use std::iter::FromIterator; use std::time::Duration; +use deltalake_core::kernel::transaction::CommitBuilder; use deltalake_core::kernel::{Action, Add, DataType, PrimitiveType, StructField, StructType}; -use deltalake_core::operations::transaction::CommitBuilder; use deltalake_core::operations::DeltaOps; use deltalake_core::protocol::{DeltaOperation, SaveMode}; use deltalake_core::{DeltaTable, DeltaTableBuilder}; diff --git a/crates/test/src/lib.rs b/crates/test/src/lib.rs index dd8c2a2951..6930f6a718 100644 --- a/crates/test/src/lib.rs +++ b/crates/test/src/lib.rs @@ -4,10 +4,10 @@ use std::collections::HashMap; use std::sync::Arc; use bytes::Bytes; +use deltalake_core::kernel::transaction::CommitBuilder; use deltalake_core::kernel::{Action, Add, Remove, StructType}; use deltalake_core::logstore::LogStore; use deltalake_core::operations::create::CreateBuilder; -use deltalake_core::operations::transaction::CommitBuilder; use deltalake_core::protocol::{DeltaOperation, SaveMode}; use deltalake_core::DeltaTable; use deltalake_core::DeltaTableBuilder; diff --git a/python/src/lib.rs b/python/src/lib.rs index bd2aba0fc7..6295cd3189 100644 --- a/python/src/lib.rs +++ b/python/src/lib.rs @@ -31,6 +31,7 @@ use deltalake::datafusion::catalog::TableProvider; use deltalake::datafusion::prelude::SessionContext; use deltalake::delta_datafusion::{DeltaCdfTableProvider, DeltaDataChecker}; use deltalake::errors::DeltaTableError; +use deltalake::kernel::transaction::{CommitBuilder, CommitProperties, TableReference, PROTOCOL}; use deltalake::kernel::{ scalars::ScalarExt, Action, Add, Invariant, LogicalFile, Remove, StructType, Transaction, }; @@ -47,9 +48,6 @@ use deltalake::operations::load_cdf::CdfLoadBuilder; use deltalake::operations::optimize::{OptimizeBuilder, OptimizeType}; use deltalake::operations::restore::RestoreBuilder; use deltalake::operations::set_tbl_properties::SetTablePropertiesBuilder; -use deltalake::operations::transaction::{ - CommitBuilder, CommitProperties, TableReference, PROTOCOL, -}; use deltalake::operations::update::UpdateBuilder; use deltalake::operations::vacuum::VacuumBuilder; use deltalake::operations::write::WriteBuilder; From dcc60e5c9f858a2ad49748076492e8a8b91d771c Mon Sep 17 00:00:00 2001 From: Robert Pack Date: Fri, 11 Apr 2025 21:59:37 +0200 Subject: [PATCH 23/23] refactor: move transaction module to kernel Signed-off-by: Robert Pack --- crates/aws/src/logstore/default_logstore.rs | 2 +- crates/aws/src/logstore/dynamodb_logstore.rs | 2 +- crates/aws/tests/integration_s3_dynamodb.rs | 2 +- crates/core/src/errors.rs | 2 +- crates/core/src/kernel/mod.rs | 1 + .../core/src/kernel/snapshot/log_segment.rs | 4 +-- crates/core/src/kernel/snapshot/mod.rs | 3 +-- crates/core/src/kernel/snapshot/replay.rs | 2 +- .../transaction/application.rs | 2 +- .../transaction/conflict_checker.rs | 0 .../{operations => kernel}/transaction/mod.rs | 11 ++++---- .../transaction/protocol.rs | 3 ++- .../transaction/state.rs | 0 crates/core/src/logstore/default_logstore.rs | 8 +++--- crates/core/src/logstore/mod.rs | 2 +- crates/core/src/operations/add_column.rs | 2 +- crates/core/src/operations/add_feature.rs | 2 +- crates/core/src/operations/constraints.rs | 9 +++---- .../core/src/operations/convert_to_delta.rs | 5 ++-- crates/core/src/operations/create.rs | 2 +- crates/core/src/operations/delete.rs | 2 +- .../core/src/operations/drop_constraints.rs | 2 +- .../core/src/operations/filesystem_check.rs | 3 +-- crates/core/src/operations/load.rs | 2 +- crates/core/src/operations/merge/mod.rs | 5 +--- crates/core/src/operations/mod.rs | 1 - crates/core/src/operations/optimize.rs | 8 +++--- crates/core/src/operations/restore.rs | 5 ++-- .../core/src/operations/set_tbl_properties.rs | 2 +- crates/core/src/operations/update.rs | 26 +++++++------------ .../src/operations/update_field_metadata.rs | 2 +- crates/core/src/operations/vacuum.rs | 2 +- crates/core/src/operations/write/mod.rs | 2 +- crates/core/src/protocol/checkpoints.rs | 2 +- crates/core/src/table/state.rs | 2 +- .../core/src/test_utils/factories/actions.rs | 2 +- crates/core/src/writer/mod.rs | 2 +- crates/core/tests/command_merge.rs | 2 +- crates/core/tests/command_optimize.rs | 2 +- crates/core/tests/commit_info_format.rs | 2 +- crates/core/tests/fs_common/mod.rs | 2 +- crates/lakefs/src/client.rs | 2 +- crates/lakefs/src/errors.rs | 2 +- crates/lakefs/src/logstore.rs | 4 +-- crates/test/src/concurrent.rs | 2 +- crates/test/src/lib.rs | 2 +- python/src/lib.rs | 4 +-- 47 files changed, 70 insertions(+), 88 deletions(-) rename crates/core/src/{operations => kernel}/transaction/application.rs (97%) rename crates/core/src/{operations => kernel}/transaction/conflict_checker.rs (100%) rename crates/core/src/{operations => kernel}/transaction/mod.rs (99%) rename crates/core/src/{operations => kernel}/transaction/protocol.rs (99%) rename crates/core/src/{operations => kernel}/transaction/state.rs (100%) diff --git a/crates/aws/src/logstore/default_logstore.rs b/crates/aws/src/logstore/default_logstore.rs index 7677010f15..0a73bd33b7 100644 --- a/crates/aws/src/logstore/default_logstore.rs +++ b/crates/aws/src/logstore/default_logstore.rs @@ -5,7 +5,7 @@ use std::sync::Arc; use bytes::Bytes; use deltalake_core::logstore::*; use deltalake_core::{ - operations::transaction::TransactionError, + kernel::transaction::TransactionError, storage::{ObjectStoreRef, StorageOptions}, DeltaResult, }; diff --git a/crates/aws/src/logstore/dynamodb_logstore.rs b/crates/aws/src/logstore/dynamodb_logstore.rs index f5680e091c..69e282f282 100644 --- a/crates/aws/src/logstore/dynamodb_logstore.rs +++ b/crates/aws/src/logstore/dynamodb_logstore.rs @@ -14,7 +14,7 @@ use url::Url; use deltalake_core::logstore::*; use deltalake_core::{ - operations::transaction::TransactionError, + kernel::transaction::TransactionError, storage::{ObjectStoreRef, StorageOptions}, DeltaResult, DeltaTableError, }; diff --git a/crates/aws/tests/integration_s3_dynamodb.rs b/crates/aws/tests/integration_s3_dynamodb.rs index 1585c912c0..6432b22d3f 100644 --- a/crates/aws/tests/integration_s3_dynamodb.rs +++ b/crates/aws/tests/integration_s3_dynamodb.rs @@ -10,10 +10,10 @@ use aws_sdk_dynamodb::types::BillingMode; use deltalake_aws::logstore::{RepairLogEntryResult, S3DynamoDbLogStore}; use deltalake_aws::storage::S3StorageOptions; use deltalake_aws::{CommitEntry, DynamoDbConfig, DynamoDbLockClient}; +use deltalake_core::kernel::transaction::CommitBuilder; use deltalake_core::kernel::{Action, Add, DataType, PrimitiveType, StructField, StructType}; use deltalake_core::logstore::{logstore_for, CommitOrBytes, LogStore}; use deltalake_core::operations::create::CreateBuilder; -use deltalake_core::operations::transaction::CommitBuilder; use deltalake_core::protocol::{DeltaOperation, SaveMode}; use deltalake_core::storage::commit_uri_from_version; use deltalake_core::storage::StorageOptions; diff --git a/crates/core/src/errors.rs b/crates/core/src/errors.rs index 9980cf23ad..c97d753408 100644 --- a/crates/core/src/errors.rs +++ b/crates/core/src/errors.rs @@ -2,7 +2,7 @@ use chrono::{DateTime, Utc}; use object_store::Error as ObjectStoreError; -use crate::operations::transaction::{CommitBuilderError, TransactionError}; +use crate::kernel::transaction::{CommitBuilderError, TransactionError}; use crate::protocol::ProtocolError; /// A result returned by delta-rs diff --git a/crates/core/src/kernel/mod.rs b/crates/core/src/kernel/mod.rs index efac04489d..6a9ba71c94 100644 --- a/crates/core/src/kernel/mod.rs +++ b/crates/core/src/kernel/mod.rs @@ -10,6 +10,7 @@ pub mod error; pub mod models; pub mod scalars; mod snapshot; +pub mod transaction; pub use error::*; pub use models::*; diff --git a/crates/core/src/kernel/snapshot/log_segment.rs b/crates/core/src/kernel/snapshot/log_segment.rs index 015317a3ac..8f663bb41f 100644 --- a/crates/core/src/kernel/snapshot/log_segment.rs +++ b/crates/core/src/kernel/snapshot/log_segment.rs @@ -16,9 +16,9 @@ use serde::{Deserialize, Serialize}; use tracing::debug; use super::parse; +use crate::kernel::transaction::CommitData; use crate::kernel::{arrow::json, ActionType, Metadata, Protocol, Schema, StructType}; use crate::logstore::LogStore; -use crate::operations::transaction::CommitData; use crate::{DeltaResult, DeltaTableConfig, DeltaTableError}; const LAST_CHECKPOINT_FILE_NAME: &str = "_last_checkpoint"; @@ -584,8 +584,8 @@ pub(super) mod tests { use crate::{ checkpoints::{create_checkpoint_for, create_checkpoint_from_table_uri_and_cleanup}, + kernel::transaction::{CommitBuilder, TableReference}, kernel::{Action, Add, Format, Remove}, - operations::transaction::{CommitBuilder, TableReference}, protocol::{DeltaOperation, SaveMode}, DeltaTableBuilder, }; diff --git a/crates/core/src/kernel/snapshot/mod.rs b/crates/core/src/kernel/snapshot/mod.rs index 36c4cd221a..24ca090254 100644 --- a/crates/core/src/kernel/snapshot/mod.rs +++ b/crates/core/src/kernel/snapshot/mod.rs @@ -36,10 +36,9 @@ use super::{ Transaction, }; use crate::kernel::parse::read_cdf_adds; +use crate::kernel::transaction::{CommitData, PROTOCOL}; use crate::kernel::{ActionType, StructType}; use crate::logstore::LogStore; -use crate::operations::transaction::CommitData; -use crate::operations::transaction::PROTOCOL; use crate::table::config::TableConfig; use crate::{DeltaResult, DeltaTableConfig, DeltaTableError}; diff --git a/crates/core/src/kernel/snapshot/replay.rs b/crates/core/src/kernel/snapshot/replay.rs index e7b4149067..cfe812b2f2 100644 --- a/crates/core/src/kernel/snapshot/replay.rs +++ b/crates/core/src/kernel/snapshot/replay.rs @@ -609,8 +609,8 @@ pub(super) mod tests { use super::super::{log_segment::LogSegment, partitions_schema, stats_schema}; use super::*; + use crate::kernel::transaction::CommitData; use crate::kernel::{models::ActionType, StructType}; - use crate::operations::transaction::CommitData; use crate::protocol::DeltaOperation; use crate::table::config::TableConfig; use crate::test_utils::{ActionFactory, TestResult, TestSchemas}; diff --git a/crates/core/src/operations/transaction/application.rs b/crates/core/src/kernel/transaction/application.rs similarity index 97% rename from crates/core/src/operations/transaction/application.rs rename to crates/core/src/kernel/transaction/application.rs index c2f08e8838..1ae9b9d1c8 100644 --- a/crates/core/src/operations/transaction/application.rs +++ b/crates/core/src/kernel/transaction/application.rs @@ -1,7 +1,7 @@ #[cfg(test)] mod tests { use crate::{ - checkpoints, kernel::Transaction, operations::transaction::CommitProperties, + checkpoints, kernel::transaction::CommitProperties, kernel::Transaction, protocol::SaveMode, writer::test_utils::get_record_batch, DeltaOps, DeltaTableBuilder, }; diff --git a/crates/core/src/operations/transaction/conflict_checker.rs b/crates/core/src/kernel/transaction/conflict_checker.rs similarity index 100% rename from crates/core/src/operations/transaction/conflict_checker.rs rename to crates/core/src/kernel/transaction/conflict_checker.rs diff --git a/crates/core/src/operations/transaction/mod.rs b/crates/core/src/kernel/transaction/mod.rs similarity index 99% rename from crates/core/src/operations/transaction/mod.rs rename to crates/core/src/kernel/transaction/mod.rs index 3f055afe06..1908d1dcda 100644 --- a/crates/core/src/operations/transaction/mod.rs +++ b/crates/core/src/kernel/transaction/mod.rs @@ -86,22 +86,23 @@ use serde_json::Value; use tracing::*; use uuid::Uuid; -pub use self::conflict_checker::CommitConflictError; +use delta_kernel::table_features::{ReaderFeature, WriterFeature}; +use serde::{Deserialize, Serialize}; + use self::conflict_checker::{TransactionInfo, WinningCommitSummary}; -pub use self::protocol::INSTANCE as PROTOCOL; use crate::checkpoints::{cleanup_expired_logs_for, create_checkpoint_for}; use crate::errors::DeltaTableError; use crate::kernel::{Action, CommitInfo, EagerSnapshot, Metadata, Protocol, Transaction}; use crate::logstore::{CommitOrBytes, LogStoreRef}; +use crate::operations::CustomExecuteHandler; use crate::protocol::DeltaOperation; use crate::storage::ObjectStoreRef; use crate::table::config::TableConfig; use crate::table::state::DeltaTableState; use crate::{crate_version, DeltaResult}; -use delta_kernel::table_features::{ReaderFeature, WriterFeature}; -use serde::{Deserialize, Serialize}; -use super::CustomExecuteHandler; +pub use self::conflict_checker::CommitConflictError; +pub use self::protocol::INSTANCE as PROTOCOL; #[cfg(test)] pub(crate) mod application; diff --git a/crates/core/src/operations/transaction/protocol.rs b/crates/core/src/kernel/transaction/protocol.rs similarity index 99% rename from crates/core/src/operations/transaction/protocol.rs rename to crates/core/src/kernel/transaction/protocol.rs index 495638b3c0..f5e2f5fbf0 100644 --- a/crates/core/src/operations/transaction/protocol.rs +++ b/crates/core/src/kernel/transaction/protocol.rs @@ -1,11 +1,12 @@ use std::collections::HashSet; use std::sync::LazyLock; +use delta_kernel::table_features::{ReaderFeature, WriterFeature}; + use super::{TableReference, TransactionError}; use crate::kernel::{contains_timestampntz, Action, EagerSnapshot, Protocol, Schema}; use crate::protocol::DeltaOperation; use crate::table::state::DeltaTableState; -use delta_kernel::table_features::{ReaderFeature, WriterFeature}; static READER_V2: LazyLock> = LazyLock::new(|| HashSet::from_iter([ReaderFeature::ColumnMapping])); diff --git a/crates/core/src/operations/transaction/state.rs b/crates/core/src/kernel/transaction/state.rs similarity index 100% rename from crates/core/src/operations/transaction/state.rs rename to crates/core/src/kernel/transaction/state.rs diff --git a/crates/core/src/logstore/default_logstore.rs b/crates/core/src/logstore/default_logstore.rs index e5c3a54e53..f9dad9cccc 100644 --- a/crates/core/src/logstore/default_logstore.rs +++ b/crates/core/src/logstore/default_logstore.rs @@ -7,11 +7,9 @@ use object_store::{Attributes, Error as ObjectStoreError, ObjectStore, PutOption use uuid::Uuid; use super::{CommitOrBytes, LogStore, LogStoreConfig}; -use crate::{ - operations::transaction::TransactionError, - storage::{commit_uri_from_version, ObjectStoreRef}, - DeltaResult, -}; +use crate::kernel::transaction::TransactionError; +use crate::storage::{commit_uri_from_version, ObjectStoreRef}; +use crate::DeltaResult; fn put_options() -> &'static PutOptions { static PUT_OPTS: OnceLock = OnceLock::new(); diff --git a/crates/core/src/logstore/mod.rs b/crates/core/src/logstore/mod.rs index 6a1e826a23..f037d9784f 100644 --- a/crates/core/src/logstore/mod.rs +++ b/crates/core/src/logstore/mod.rs @@ -18,8 +18,8 @@ use url::Url; use uuid::Uuid; use crate::kernel::log_segment::PathExt; +use crate::kernel::transaction::TransactionError; use crate::kernel::Action; -use crate::operations::transaction::TransactionError; use crate::protocol::{get_last_checkpoint, ProtocolError}; use crate::storage::DeltaIOStorageBackend; use crate::storage::{ diff --git a/crates/core/src/operations/add_column.rs b/crates/core/src/operations/add_column.rs index e6646edb9c..75063963ba 100644 --- a/crates/core/src/operations/add_column.rs +++ b/crates/core/src/operations/add_column.rs @@ -6,8 +6,8 @@ use delta_kernel::schema::StructType; use futures::future::BoxFuture; use itertools::Itertools; -use super::transaction::{CommitBuilder, CommitProperties}; use super::{CustomExecuteHandler, Operation}; +use crate::kernel::transaction::{CommitBuilder, CommitProperties}; use crate::kernel::{StructField, StructTypeExt}; use crate::logstore::LogStoreRef; use crate::operations::cast::merge_schema::merge_delta_struct; diff --git a/crates/core/src/operations/add_feature.rs b/crates/core/src/operations/add_feature.rs index 97777e6b10..ddc3a0b531 100644 --- a/crates/core/src/operations/add_feature.rs +++ b/crates/core/src/operations/add_feature.rs @@ -6,8 +6,8 @@ use delta_kernel::table_features::{ReaderFeature, WriterFeature}; use futures::future::BoxFuture; use itertools::Itertools; -use super::transaction::{CommitBuilder, CommitProperties}; use super::{CustomExecuteHandler, Operation}; +use crate::kernel::transaction::{CommitBuilder, CommitProperties}; use crate::kernel::TableFeatures; use crate::logstore::LogStoreRef; use crate::protocol::DeltaOperation; diff --git a/crates/core/src/operations/constraints.rs b/crates/core/src/operations/constraints.rs index 97ea29c38e..be71314faf 100644 --- a/crates/core/src/operations/constraints.rs +++ b/crates/core/src/operations/constraints.rs @@ -7,13 +7,17 @@ use datafusion::execution::{SendableRecordBatchStream, TaskContext}; use datafusion::prelude::SessionContext; use datafusion_common::ToDFSchema; use datafusion_physical_plan::ExecutionPlan; +use delta_kernel::table_features::WriterFeature; use futures::future::BoxFuture; use futures::StreamExt; +use super::datafusion_utils::into_expr; +use super::{CustomExecuteHandler, Operation}; use crate::delta_datafusion::expr::fmt_expr_to_sql; use crate::delta_datafusion::{ register_store, DeltaDataChecker, DeltaScanBuilder, DeltaSessionContext, }; +use crate::kernel::transaction::{CommitBuilder, CommitProperties}; use crate::kernel::Protocol; use crate::logstore::LogStoreRef; use crate::operations::datafusion_utils::Expression; @@ -21,11 +25,6 @@ use crate::protocol::DeltaOperation; use crate::table::state::DeltaTableState; use crate::table::Constraint; use crate::{DeltaResult, DeltaTable, DeltaTableError}; -use delta_kernel::table_features::WriterFeature; - -use super::datafusion_utils::into_expr; -use super::transaction::{CommitBuilder, CommitProperties}; -use super::{CustomExecuteHandler, Operation}; /// Build a constraint to add to a table pub struct ConstraintBuilder { diff --git a/crates/core/src/operations/convert_to_delta.rs b/crates/core/src/operations/convert_to_delta.rs index f4c563fbc5..e6f72d25fe 100644 --- a/crates/core/src/operations/convert_to_delta.rs +++ b/crates/core/src/operations/convert_to_delta.rs @@ -16,6 +16,8 @@ use percent_encoding::percent_decode_str; use tracing::debug; use uuid::Uuid; +use super::{CustomExecuteHandler, Operation}; +use crate::kernel::transaction::CommitProperties; use crate::operations::get_num_idx_cols_and_stats_columns; use crate::{ kernel::{scalars::ScalarExt, Add, DataType, Schema, StructField}, @@ -28,9 +30,6 @@ use crate::{ DeltaResult, DeltaTable, DeltaTableError, ObjectStoreError, NULL_PARTITION_VALUE_DATA_PATH, }; -use super::transaction::CommitProperties; -use super::{CustomExecuteHandler, Operation}; - /// Error converting a Parquet table to a Delta table #[derive(Debug, thiserror::Error)] enum Error { diff --git a/crates/core/src/operations/create.rs b/crates/core/src/operations/create.rs index 090b116637..3a8bbb2bc1 100644 --- a/crates/core/src/operations/create.rs +++ b/crates/core/src/operations/create.rs @@ -10,9 +10,9 @@ use serde_json::Value; use tracing::log::*; use uuid::Uuid; -use super::transaction::{CommitBuilder, CommitProperties, TableReference, PROTOCOL}; use super::{CustomExecuteHandler, Operation}; use crate::errors::{DeltaResult, DeltaTableError}; +use crate::kernel::transaction::{CommitBuilder, CommitProperties, TableReference, PROTOCOL}; use crate::kernel::{Action, DataType, Metadata, Protocol, StructField, StructType}; use crate::logstore::{LogStore, LogStoreRef}; use crate::protocol::{DeltaOperation, SaveMode}; diff --git a/crates/core/src/operations/delete.rs b/crates/core/src/operations/delete.rs index c6d943e1c0..34ffde8a6c 100644 --- a/crates/core/src/operations/delete.rs +++ b/crates/core/src/operations/delete.rs @@ -40,7 +40,6 @@ use serde::Serialize; use super::cdc::should_write_cdc; use super::datafusion_utils::Expression; -use super::transaction::{CommitBuilder, CommitProperties, PROTOCOL}; use super::Operation; use crate::delta_datafusion::expr::fmt_expr_to_sql; use crate::delta_datafusion::logical::MetricObserver; @@ -51,6 +50,7 @@ use crate::delta_datafusion::{ DeltaTableProvider, }; use crate::errors::DeltaResult; +use crate::kernel::transaction::{CommitBuilder, CommitProperties, PROTOCOL}; use crate::kernel::{Action, Add, Remove}; use crate::logstore::LogStoreRef; use crate::operations::write::execution::{write_execution_plan, write_execution_plan_cdc}; diff --git a/crates/core/src/operations/drop_constraints.rs b/crates/core/src/operations/drop_constraints.rs index 6743c3415a..2c362913d6 100644 --- a/crates/core/src/operations/drop_constraints.rs +++ b/crates/core/src/operations/drop_constraints.rs @@ -4,8 +4,8 @@ use std::sync::Arc; use futures::future::BoxFuture; -use super::transaction::{CommitBuilder, CommitProperties}; use super::{CustomExecuteHandler, Operation}; +use crate::kernel::transaction::{CommitBuilder, CommitProperties}; use crate::kernel::Action; use crate::logstore::LogStoreRef; use crate::protocol::DeltaOperation; diff --git a/crates/core/src/operations/filesystem_check.rs b/crates/core/src/operations/filesystem_check.rs index 32591f2812..e6474b285d 100644 --- a/crates/core/src/operations/filesystem_check.rs +++ b/crates/core/src/operations/filesystem_check.rs @@ -20,16 +20,15 @@ use std::time::UNIX_EPOCH; use futures::future::BoxFuture; use futures::StreamExt; -pub use object_store::path::Path; use object_store::ObjectStore; use serde::{de::Error as DeError, Deserialize, Deserializer, Serialize, Serializer}; use url::{ParseError, Url}; use uuid::Uuid; -use super::transaction::{CommitBuilder, CommitProperties}; use super::CustomExecuteHandler; use super::Operation; use crate::errors::{DeltaResult, DeltaTableError}; +use crate::kernel::transaction::{CommitBuilder, CommitProperties}; use crate::kernel::{Action, Add, Remove}; use crate::logstore::LogStoreRef; use crate::protocol::DeltaOperation; diff --git a/crates/core/src/operations/load.rs b/crates/core/src/operations/load.rs index cf7f0c93ca..c52b19f03b 100644 --- a/crates/core/src/operations/load.rs +++ b/crates/core/src/operations/load.rs @@ -6,10 +6,10 @@ use datafusion::physical_plan::coalesce_partitions::CoalescePartitionsExec; use datafusion::physical_plan::{ExecutionPlan, SendableRecordBatchStream}; use futures::future::BoxFuture; -use super::transaction::PROTOCOL; use super::CustomExecuteHandler; use crate::delta_datafusion::DataFusionMixins; use crate::errors::{DeltaResult, DeltaTableError}; +use crate::kernel::transaction::PROTOCOL; use crate::logstore::LogStoreRef; use crate::table::state::DeltaTableState; use crate::DeltaTable; diff --git a/crates/core/src/operations/merge/mod.rs b/crates/core/src/operations/merge/mod.rs index 782b070856..d14b60c6f3 100644 --- a/crates/core/src/operations/merge/mod.rs +++ b/crates/core/src/operations/merge/mod.rs @@ -66,9 +66,7 @@ use tracing::log::*; use uuid::Uuid; use self::barrier::{MergeBarrier, MergeBarrierExec}; - use super::datafusion_utils::{into_expr, maybe_into_expr, Expression}; -use super::transaction::{CommitProperties, PROTOCOL}; use super::{CustomExecuteHandler, Operation}; use crate::delta_datafusion::expr::{fmt_expr_to_sql, parse_predicate_expression}; use crate::delta_datafusion::logical::MetricObserver; @@ -78,13 +76,12 @@ use crate::delta_datafusion::{ register_store, DataFusionMixins, DeltaColumn, DeltaScan, DeltaScanConfigBuilder, DeltaSessionConfig, DeltaTableProvider, }; - +use crate::kernel::transaction::{CommitBuilder, CommitProperties, PROTOCOL}; use crate::kernel::{Action, Metadata, StructTypeExt}; use crate::logstore::LogStoreRef; use crate::operations::cast::merge_schema::{merge_arrow_field, merge_arrow_schema}; use crate::operations::cdc::*; use crate::operations::merge::barrier::find_node; -use crate::operations::transaction::CommitBuilder; use crate::operations::write::execution::write_execution_plan_v2; use crate::operations::write::generated_columns::{ add_generated_columns, add_missing_generated_columns, diff --git a/crates/core/src/operations/mod.rs b/crates/core/src/operations/mod.rs index 6f541395bf..f17f630d98 100644 --- a/crates/core/src/operations/mod.rs +++ b/crates/core/src/operations/mod.rs @@ -45,7 +45,6 @@ pub mod create; pub mod drop_constraints; pub mod filesystem_check; pub mod restore; -pub mod transaction; pub mod update_field_metadata; pub mod vacuum; diff --git a/crates/core/src/operations/optimize.rs b/crates/core/src/operations/optimize.rs index 503e09ffcb..ba0aa92d9c 100644 --- a/crates/core/src/operations/optimize.rs +++ b/crates/core/src/operations/optimize.rs @@ -42,21 +42,19 @@ use serde::{de::Error as DeError, Deserialize, Deserializer, Serialize, Serializ use tracing::*; use uuid::Uuid; -use super::transaction::PROTOCOL; use super::write::writer::{PartitionWriter, PartitionWriterConfig}; use super::{CustomExecuteHandler, Operation}; +use crate::delta_datafusion::DeltaTableProvider; use crate::errors::{DeltaResult, DeltaTableError}; -use crate::kernel::Add; -use crate::kernel::{scalars::ScalarExt, Action, PartitionsExt, Remove}; +use crate::kernel::transaction::{CommitBuilder, CommitProperties, DEFAULT_RETRIES, PROTOCOL}; +use crate::kernel::{scalars::ScalarExt, Action, Add, PartitionsExt, Remove}; use crate::logstore::LogStoreRef; -use crate::operations::transaction::{CommitBuilder, CommitProperties, DEFAULT_RETRIES}; use crate::protocol::DeltaOperation; use crate::storage::ObjectStoreRef; use crate::table::state::DeltaTableState; use crate::writer::utils::arrow_schema_without_partitions; use crate::{crate_version, DeltaTable, ObjectMeta, PartitionFilter}; -use crate::delta_datafusion::DeltaTableProvider; /// Metrics from Optimize #[derive(Default, Debug, PartialEq, Clone, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] diff --git a/crates/core/src/operations/restore.rs b/crates/core/src/operations/restore.rs index e452110927..ae7d0f2b04 100644 --- a/crates/core/src/operations/restore.rs +++ b/crates/core/src/operations/restore.rs @@ -33,15 +33,14 @@ use object_store::ObjectStore; use serde::Serialize; use uuid::Uuid; +use super::{CustomExecuteHandler, Operation}; +use crate::kernel::transaction::{CommitBuilder, CommitProperties, TransactionError}; use crate::kernel::{Action, Add, Protocol, Remove}; use crate::logstore::LogStoreRef; use crate::protocol::DeltaOperation; use crate::table::state::DeltaTableState; use crate::{DeltaResult, DeltaTable, DeltaTableConfig, DeltaTableError, ObjectStoreError}; -use super::transaction::{CommitBuilder, CommitProperties, TransactionError}; -use super::{CustomExecuteHandler, Operation}; - /// Errors that can occur during restore #[derive(thiserror::Error, Debug)] enum RestoreError { diff --git a/crates/core/src/operations/set_tbl_properties.rs b/crates/core/src/operations/set_tbl_properties.rs index 3e17f8c07c..ccbf120159 100644 --- a/crates/core/src/operations/set_tbl_properties.rs +++ b/crates/core/src/operations/set_tbl_properties.rs @@ -5,8 +5,8 @@ use std::sync::Arc; use futures::future::BoxFuture; -use super::transaction::{CommitBuilder, CommitProperties}; use super::{CustomExecuteHandler, Operation}; +use crate::kernel::transaction::{CommitBuilder, CommitProperties}; use crate::kernel::Action; use crate::logstore::LogStoreRef; use crate::protocol::DeltaOperation; diff --git a/crates/core/src/operations/update.rs b/crates/core/src/operations/update.rs index 7433ef57b2..aaf3173966 100644 --- a/crates/core/src/operations/update.rs +++ b/crates/core/src/operations/update.rs @@ -45,32 +45,26 @@ use serde::Serialize; use tracing::log::*; use uuid::Uuid; -use super::{ - datafusion_utils::Expression, - transaction::{CommitBuilder, CommitProperties}, -}; -use super::{transaction::PROTOCOL, write::WriterStatsConfig}; +use super::datafusion_utils::Expression; +use super::write::WriterStatsConfig; use super::{ write::execution::{write_execution_plan, write_execution_plan_cdc}, CustomExecuteHandler, Operation, }; +use crate::delta_datafusion::{ + expr::fmt_expr_to_sql, + logical::MetricObserver, + physical::{find_metric_node, get_metric, MetricObserverExec}, + DataFusionMixins, DeltaColumn, DeltaScanConfigBuilder, DeltaSessionContext, DeltaTableProvider, +}; use crate::delta_datafusion::{find_files, planner::DeltaPlanner, register_store}; +use crate::kernel::transaction::{CommitBuilder, CommitProperties, PROTOCOL}; use crate::kernel::{Action, Remove}; use crate::logstore::LogStoreRef; use crate::operations::cdc::*; use crate::protocol::DeltaOperation; use crate::table::state::DeltaTableState; -use crate::{ - delta_datafusion::{ - expr::fmt_expr_to_sql, - logical::MetricObserver, - physical::{find_metric_node, get_metric, MetricObserverExec}, - DataFusionMixins, DeltaColumn, DeltaScanConfigBuilder, DeltaSessionContext, - DeltaTableProvider, - }, - DeltaTableError, -}; -use crate::{DeltaResult, DeltaTable}; +use crate::{DeltaResult, DeltaTable, DeltaTableError}; /// Custom column name used for marking internal [RecordBatch] rows as updated pub(crate) const UPDATE_PREDICATE_COLNAME: &str = "__delta_rs_update_predicate"; diff --git a/crates/core/src/operations/update_field_metadata.rs b/crates/core/src/operations/update_field_metadata.rs index b81382dc24..2062c97960 100644 --- a/crates/core/src/operations/update_field_metadata.rs +++ b/crates/core/src/operations/update_field_metadata.rs @@ -7,8 +7,8 @@ use delta_kernel::schema::{MetadataValue, StructType}; use futures::future::BoxFuture; use itertools::Itertools; -use super::transaction::{CommitBuilder, CommitProperties}; use super::{CustomExecuteHandler, Operation}; +use crate::kernel::transaction::{CommitBuilder, CommitProperties}; use crate::logstore::LogStoreRef; use crate::protocol::DeltaOperation; use crate::table::state::DeltaTableState; diff --git a/crates/core/src/operations/vacuum.rs b/crates/core/src/operations/vacuum.rs index 4e5c46589f..1951ae7f9d 100644 --- a/crates/core/src/operations/vacuum.rs +++ b/crates/core/src/operations/vacuum.rs @@ -32,9 +32,9 @@ use object_store::Error; use object_store::{path::Path, ObjectStore}; use serde::Serialize; -use super::transaction::{CommitBuilder, CommitProperties}; use super::{CustomExecuteHandler, Operation}; use crate::errors::{DeltaResult, DeltaTableError}; +use crate::kernel::transaction::{CommitBuilder, CommitProperties}; use crate::logstore::LogStoreRef; use crate::protocol::DeltaOperation; use crate::table::state::DeltaTableState; diff --git a/crates/core/src/operations/write/mod.rs b/crates/core/src/operations/write/mod.rs index 1b2e1be830..c0efefa023 100644 --- a/crates/core/src/operations/write/mod.rs +++ b/crates/core/src/operations/write/mod.rs @@ -58,7 +58,6 @@ use tracing::log::*; use super::cdc::CDC_COLUMN_NAME; use super::datafusion_utils::Expression; -use super::transaction::{CommitBuilder, CommitProperties, TableReference, PROTOCOL}; use super::{CreateBuilder, CustomExecuteHandler, Operation}; use crate::delta_datafusion::expr::fmt_expr_to_sql; use crate::delta_datafusion::expr::parse_predicate_expression; @@ -68,6 +67,7 @@ use crate::delta_datafusion::planner::DeltaPlanner; use crate::delta_datafusion::register_store; use crate::delta_datafusion::DataFusionMixins; use crate::errors::{DeltaResult, DeltaTableError}; +use crate::kernel::transaction::{CommitBuilder, CommitProperties, TableReference, PROTOCOL}; use crate::kernel::{Action, ActionType, Metadata, StructType, StructTypeExt}; use crate::logstore::LogStoreRef; use crate::operations::cast::merge_schema::merge_arrow_schema; diff --git a/crates/core/src/protocol/checkpoints.rs b/crates/core/src/protocol/checkpoints.rs index e7e3141cd7..34bd8ef5f3 100644 --- a/crates/core/src/protocol/checkpoints.rs +++ b/crates/core/src/protocol/checkpoints.rs @@ -602,8 +602,8 @@ mod tests { use serde_json::json; use super::*; + use crate::kernel::transaction::{CommitBuilder, TableReference}; use crate::kernel::StructType; - use crate::operations::transaction::{CommitBuilder, TableReference}; use crate::operations::DeltaOps; use crate::protocol::Metadata; use crate::writer::test_utils::get_delta_schema; diff --git a/crates/core/src/table/state.rs b/crates/core/src/table/state.rs index 2a25399c42..da2b76fd97 100644 --- a/crates/core/src/table/state.rs +++ b/crates/core/src/table/state.rs @@ -60,7 +60,7 @@ impl DeltaTableState { /// Construct a delta table state object from a list of actions #[cfg(test)] pub fn from_actions(actions: Vec) -> DeltaResult { - use crate::operations::transaction::CommitData; + use crate::kernel::transaction::CommitData; use crate::protocol::{DeltaOperation, SaveMode}; let metadata = actions diff --git a/crates/core/src/test_utils/factories/actions.rs b/crates/core/src/test_utils/factories/actions.rs index bd55b260e1..72da78d772 100644 --- a/crates/core/src/test_utils/factories/actions.rs +++ b/crates/core/src/test_utils/factories/actions.rs @@ -9,8 +9,8 @@ use object_store::ObjectMeta; use super::{get_parquet_bytes, DataFactory, FileStats}; use crate::kernel::arrow::extract::{self as ex}; use crate::kernel::partitions_schema; +use crate::kernel::transaction::PROTOCOL; use crate::kernel::{Add, Metadata, Protocol, Remove, StructType}; -use crate::operations::transaction::PROTOCOL; use delta_kernel::table_features::{ReaderFeature, WriterFeature}; pub struct ActionFactory; diff --git a/crates/core/src/writer/mod.rs b/crates/core/src/writer/mod.rs index cd87459c2f..169a72bdf8 100644 --- a/crates/core/src/writer/mod.rs +++ b/crates/core/src/writer/mod.rs @@ -7,8 +7,8 @@ use parquet::errors::ParquetError; use serde_json::Value; use crate::errors::DeltaTableError; +use crate::kernel::transaction::{CommitBuilder, CommitProperties}; use crate::kernel::{Action, Add}; -use crate::operations::transaction::{CommitBuilder, CommitProperties}; use crate::protocol::{ColumnCountStat, DeltaOperation, SaveMode}; use crate::DeltaTable; diff --git a/crates/core/tests/command_merge.rs b/crates/core/tests/command_merge.rs index 7b4c3aad01..c0afc200e2 100644 --- a/crates/core/tests/command_merge.rs +++ b/crates/core/tests/command_merge.rs @@ -7,9 +7,9 @@ use datafusion::dataframe::DataFrame; use datafusion::prelude::SessionContext; use datafusion_common::Column; use datafusion_expr::{col, lit, Expr}; +use deltalake_core::kernel::transaction::TransactionError; use deltalake_core::kernel::{DataType as DeltaDataType, PrimitiveType, StructField, StructType}; use deltalake_core::operations::merge::MergeMetrics; -use deltalake_core::operations::transaction::TransactionError; use deltalake_core::protocol::SaveMode; use deltalake_core::{open_table, DeltaOps, DeltaResult, DeltaTable, DeltaTableError}; use std::sync::Arc; diff --git a/crates/core/tests/command_optimize.rs b/crates/core/tests/command_optimize.rs index 4826647750..5cd143864a 100644 --- a/crates/core/tests/command_optimize.rs +++ b/crates/core/tests/command_optimize.rs @@ -5,11 +5,11 @@ use arrow_array::{Int32Array, RecordBatch, StringArray}; use arrow_schema::{DataType as ArrowDataType, Field, Schema as ArrowSchema}; use arrow_select::concat::concat_batches; use deltalake_core::errors::DeltaTableError; +use deltalake_core::kernel::transaction::{CommitBuilder, CommitProperties}; use deltalake_core::kernel::{Action, DataType, PrimitiveType, StructField}; use deltalake_core::operations::optimize::{ create_merge_plan, MetricDetails, Metrics, OptimizeType, }; -use deltalake_core::operations::transaction::{CommitBuilder, CommitProperties}; use deltalake_core::operations::DeltaOps; use deltalake_core::protocol::DeltaOperation; use deltalake_core::storage::ObjectStoreRef; diff --git a/crates/core/tests/commit_info_format.rs b/crates/core/tests/commit_info_format.rs index df817365b3..1ba00cc9d3 100644 --- a/crates/core/tests/commit_info_format.rs +++ b/crates/core/tests/commit_info_format.rs @@ -1,8 +1,8 @@ #![allow(dead_code)] mod fs_common; +use deltalake_core::kernel::transaction::CommitBuilder; use deltalake_core::kernel::Action; -use deltalake_core::operations::transaction::CommitBuilder; use deltalake_core::protocol::{DeltaOperation, SaveMode}; use serde_json::json; use std::error::Error; diff --git a/crates/core/tests/fs_common/mod.rs b/crates/core/tests/fs_common/mod.rs index 061c0d8c58..ebc7cc71bc 100644 --- a/crates/core/tests/fs_common/mod.rs +++ b/crates/core/tests/fs_common/mod.rs @@ -1,9 +1,9 @@ use chrono::Utc; +use deltalake_core::kernel::transaction::CommitBuilder; use deltalake_core::kernel::{ Action, Add, DataType, PrimitiveType, Remove, StructField, StructType, }; use deltalake_core::operations::create::CreateBuilder; -use deltalake_core::operations::transaction::CommitBuilder; use deltalake_core::protocol::{DeltaOperation, SaveMode}; use deltalake_core::storage::{GetResult, ObjectStoreResult, StorageOptions}; use deltalake_core::DeltaTable; diff --git a/crates/lakefs/src/client.rs b/crates/lakefs/src/client.rs index 9647d086c8..548ca44f37 100644 --- a/crates/lakefs/src/client.rs +++ b/crates/lakefs/src/client.rs @@ -1,5 +1,5 @@ use dashmap::DashMap; -use deltalake_core::operations::transaction::TransactionError; +use deltalake_core::kernel::transaction::TransactionError; use deltalake_core::DeltaResult; use reqwest::Client; use reqwest::StatusCode; diff --git a/crates/lakefs/src/errors.rs b/crates/lakefs/src/errors.rs index f48c9d173a..ff994f8c1a 100644 --- a/crates/lakefs/src/errors.rs +++ b/crates/lakefs/src/errors.rs @@ -1,6 +1,6 @@ //! Errors for LakeFS log store -use deltalake_core::operations::transaction::TransactionError; +use deltalake_core::kernel::transaction::TransactionError; use deltalake_core::DeltaTableError; use reqwest::Error; diff --git a/crates/lakefs/src/logstore.rs b/crates/lakefs/src/logstore.rs index 11be99387a..dc494d83db 100644 --- a/crates/lakefs/src/logstore.rs +++ b/crates/lakefs/src/logstore.rs @@ -11,12 +11,12 @@ use deltalake_core::storage::{ commit_uri_from_version, DefaultObjectStoreRegistry, ObjectStoreRegistry, }; use deltalake_core::storage::{url_prefix_handler, DeltaIOStorageBackend, IORuntime}; -use deltalake_core::{logstore::*, DeltaTableError, Path}; use deltalake_core::{ - operations::transaction::TransactionError, + kernel::transaction::TransactionError, storage::{ObjectStoreRef, StorageOptions}, DeltaResult, }; +use deltalake_core::{logstore::*, DeltaTableError, Path}; use object_store::{Attributes, Error as ObjectStoreError, ObjectStore, PutOptions, TagSet}; use tracing::debug; use url::Url; diff --git a/crates/test/src/concurrent.rs b/crates/test/src/concurrent.rs index aed4576925..e06e752270 100644 --- a/crates/test/src/concurrent.rs +++ b/crates/test/src/concurrent.rs @@ -3,8 +3,8 @@ use std::future::Future; use std::iter::FromIterator; use std::time::Duration; +use deltalake_core::kernel::transaction::CommitBuilder; use deltalake_core::kernel::{Action, Add, DataType, PrimitiveType, StructField, StructType}; -use deltalake_core::operations::transaction::CommitBuilder; use deltalake_core::operations::DeltaOps; use deltalake_core::protocol::{DeltaOperation, SaveMode}; use deltalake_core::{DeltaTable, DeltaTableBuilder}; diff --git a/crates/test/src/lib.rs b/crates/test/src/lib.rs index dd8c2a2951..6930f6a718 100644 --- a/crates/test/src/lib.rs +++ b/crates/test/src/lib.rs @@ -4,10 +4,10 @@ use std::collections::HashMap; use std::sync::Arc; use bytes::Bytes; +use deltalake_core::kernel::transaction::CommitBuilder; use deltalake_core::kernel::{Action, Add, Remove, StructType}; use deltalake_core::logstore::LogStore; use deltalake_core::operations::create::CreateBuilder; -use deltalake_core::operations::transaction::CommitBuilder; use deltalake_core::protocol::{DeltaOperation, SaveMode}; use deltalake_core::DeltaTable; use deltalake_core::DeltaTableBuilder; diff --git a/python/src/lib.rs b/python/src/lib.rs index bd2aba0fc7..6295cd3189 100644 --- a/python/src/lib.rs +++ b/python/src/lib.rs @@ -31,6 +31,7 @@ use deltalake::datafusion::catalog::TableProvider; use deltalake::datafusion::prelude::SessionContext; use deltalake::delta_datafusion::{DeltaCdfTableProvider, DeltaDataChecker}; use deltalake::errors::DeltaTableError; +use deltalake::kernel::transaction::{CommitBuilder, CommitProperties, TableReference, PROTOCOL}; use deltalake::kernel::{ scalars::ScalarExt, Action, Add, Invariant, LogicalFile, Remove, StructType, Transaction, }; @@ -47,9 +48,6 @@ use deltalake::operations::load_cdf::CdfLoadBuilder; use deltalake::operations::optimize::{OptimizeBuilder, OptimizeType}; use deltalake::operations::restore::RestoreBuilder; use deltalake::operations::set_tbl_properties::SetTablePropertiesBuilder; -use deltalake::operations::transaction::{ - CommitBuilder, CommitProperties, TableReference, PROTOCOL, -}; use deltalake::operations::update::UpdateBuilder; use deltalake::operations::vacuum::VacuumBuilder; use deltalake::operations::write::WriteBuilder;