From e67ce465c8e3cf65683a0ff83339c9ba96d85cf3 Mon Sep 17 00:00:00 2001 From: Robert Pack Date: Thu, 19 Dec 2024 14:27:04 +0100 Subject: [PATCH 1/3] docs: docs updates and fixes and flatter exports Signed-off-by: Robert Pack --- README.md | 37 ++++++-- acceptance/Cargo.toml | 7 -- doc/architecture.md | 115 ----------------------- doc/roadmap.md | 33 ------- kernel/src/engine/default/file_stream.rs | 4 +- kernel/src/engine/default/mod.rs | 5 +- kernel/src/expressions/column_names.rs | 2 +- kernel/src/lib.rs | 7 +- kernel/src/log_segment.rs | 16 ++-- kernel/src/scan/mod.rs | 19 ++-- kernel/src/scan/state.rs | 5 + kernel/src/snapshot.rs | 4 +- kernel/src/table_changes/mod.rs | 3 +- kernel/src/table_changes/scan.rs | 6 +- kernel/src/transaction.rs | 4 +- 15 files changed, 75 insertions(+), 192 deletions(-) delete mode 100644 doc/architecture.md delete mode 100644 doc/roadmap.md diff --git a/README.md b/README.md index 2f5565d8f..bfe8bf27b 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,29 @@ +

+ + delta-kernel-rs logo + +

+

+ An implementation of the Delta protocol for use in native query engines. +
+ Rust docs + · + Report a bug + · + Request a feature +
+
+ + + + + Crate + + + #delta-rs in the Delta Lake Slack workspace + +

+ # delta-kernel-rs Delta-kernel-rs is an experimental [Delta][delta] implementation focused on interoperability with a @@ -12,11 +38,12 @@ is the Rust/C equivalent of [Java Delta Kernel][java-kernel]. Delta-kernel-rs is split into a few different crates: -- kernel: The actual core kernel crate -- acceptance: Acceptance tests that validate correctness via the [Delta Acceptance Tests][dat] -- derive-macros: A crate for our [derive-macros] to live in -- ffi: Functionallity that enables delta-kernel-rs to be used from `C` or `C++` See the [ffi](ffi) +- [kernel](kernel): The actual core kernel crate +- [acceptance](acceptance): Acceptance tests that validate correctness via the [Delta Acceptance Tests][dat] +- [derive-macros](derive-macros): A crate for our [derive-macros] to live in +- [ffi](ffi): Functionallity that enables delta-kernel-rs to be used from `C` or `C++` See the [ffi](ffi) directory for more information. +- [ffi-proc-macros](ffi-proc-macros): Procedural macros for the delta_kernel_ffi crate. ## Building By default we build only the `kernel` and `acceptance` crates, which will also build `derive-macros` @@ -111,7 +138,6 @@ and then checking what version of `object_store` it depends on. ## Documentation - [API Docs](https://docs.rs/delta_kernel/latest/delta_kernel/) -- [arcitecture.md](doc/architecture.md) document describing the kernel architecture (currently wip) ## Examples @@ -179,7 +205,6 @@ Some design principles which should be considered: [delta-github]: https://github.com/delta-io/delta [java-kernel]: https://github.com/delta-io/delta/tree/master/kernel [rustup]: https://rustup.rs -[architecture.md]: https://github.com/delta-io/delta-kernel-rs/tree/master/architecture.md [dat]: https://github.com/delta-incubator/dat [derive-macros]: https://doc.rust-lang.org/reference/procedural-macros.html [API Docs]: https://docs.rs/delta_kernel/latest/delta_kernel/ diff --git a/acceptance/Cargo.toml b/acceptance/Cargo.toml index ad4ccc008..10ff71782 100644 --- a/acceptance/Cargo.toml +++ b/acceptance/Cargo.toml @@ -36,14 +36,7 @@ tar = "0.4" [dev-dependencies] datatest-stable = "0.2" -test-log = { version = "0.2", default-features = false, features = ["trace"] } -tempfile = "3" -test-case = { version = "3.3.1" } tokio = { version = "1.40" } -tracing-subscriber = { version = "0.3", default-features = false, features = [ - "env-filter", - "fmt", -] } [[test]] name = "dat_reader" diff --git a/doc/architecture.md b/doc/architecture.md deleted file mode 100644 index a67ee08ab..000000000 --- a/doc/architecture.md +++ /dev/null @@ -1,115 +0,0 @@ -# Architecture - -`delta_kernel` crate architecture is still a work in progress! - -### Goals - -in order of priority (this is placeholder and we need to redo them): -1. simplicity and ease of use (probably too vague) -2. query engine agnostic -3. performance is explicitly secondary goal with the exception of operating in bounded memory - -## 10,000-foot view - -Two major API surface areas: - -1. Engine API -3. Table API - -Consider the usage pattern by example: if `delta-rs` wants to leverage -delta\_kernel to read tables, it first must take a dependency on delta\_kernel -and provide any of the traits it wishes (otherwise rely on defaults already -provided in delta\_kernel) - this is API (1) above. Then the engine code can -leverage the table API (2) in order to perform actual interaction with delta -tables. - -### Engine API - -The engine API aims to provide the least dependency surface as possible, this -means largely using traits to dictate what behavior should be implemented -"above" while placing as much "core" Delta Lake protocol implementation into -the `delta_kernel` crate as possible. - -- trait `ObjectStore` -- trait JsonReader - - arrow-json, simd-json, serde-json, etc -- trait ParquetReader - - Can DuckDB bring their own Parquer reader an in-memory format -- trait ExpressionEvaluator - - datafusion - - duckdb's - - predominantly used during data skipping, to scan for `where x < 5` based on file skipping -- struct DeltaLog/Segment/Snapshot/Replay - - generic over the above traits to allow consistent log interactions across table API implementations. - -```mermaid -classDiagram - class ObjectStore { - } - - %% DuckDB, Redshift, anything using Arrow C++, FFI? - class ParquetReader { - } - - %% arrow-json, simd-json, serde, FFI BYOJP - class JsonReader { - } - - class ExpressionEvaluator { - } -``` - -#### Engine Integrations - -Ideally there are some engines that would be able to support native Delta Lake integration on top of this API: - -* DuckDB - * Has their own parquet reader and in-memory representation -* ?? - -### Table API - -The Table API provides a little bit more opinions for handling some Delta Lake -protocol nuance and should be incorporating more deppendencies to provide -`delta_kernel` users with a simpler path to building applications on top of the -Delta Lake protocol.. - -#### Arrow - -Sane defaults for the above traits with `RecordBatch` as the mode of interop between everything. -This feature flag turns on the most sane default, parquet, json, some expression evaluator -with arrow as its in-memory format. - - -```mermaid -classDiagram - DeltaTable --> Snapshot : yields Snapshots - Snapshot --> Scan - Scan --> ScanFileIterator - ScanFileIterator --> ScanDataIterator - - class StorageClient { - } - - note for DeltaTable "Responsible for log storage" - class DeltaTable { - -Url location - get_latest_snapshot(StorageClient) Snapshot - get_latest_version(StorageClient) uint64 - } - - note for Snapshot "Responsible for log storage" - class Snapshot { - +uint64 version - } - - class Scan { - +object projection - +object predicate - } - - class StorageClient { - } -``` - - diff --git a/doc/roadmap.md b/doc/roadmap.md deleted file mode 100644 index 420027015..000000000 --- a/doc/roadmap.md +++ /dev/null @@ -1,33 +0,0 @@ -# Roadmap - -This document represents a draft of a potential roadmap for delta-kernel-rs - -The whole premise of delta-kernel is to address gaps identified in the -proliferation of APIs and custom implementations of the Delta protocol. This -should learn from those and provide a simpler or easier way for _all_ those -integrations to make following Delta Lake easier. - - -## Data and AI Summit - -Goals: - -* Get a topic branch in `delta-rs` functional with a minimal use-case, - replacing some key functionality. - * (?) Perhaps a CDC or Deletion vector use-case, to showcase the benefit. - Denny is more interested in psuedo-code level -* Defined feature strategy for library vs user API surfaces - * Singular user API implemented, perhaps arrow2 to demonstrate the - hot-swappability versus strict delta-rs -* Basic scaffolding of traits defined with issues created in repository for - contributors to participate -* Strong CI pipeline for getting new contributors on-board. -* **EVERYTHING** that exists gratuitously documented in rustdoc with doc tests -* Show how using delta-kernel-rs would be simpler for an API implementor - * Selling this idea/API to future delta integrations - -Demo ideas (order of priority): - -* wasm in the browser for delta integration. -* Bootstrap simple Ruby client -* Bootstrap simple Node client diff --git a/kernel/src/engine/default/file_stream.rs b/kernel/src/engine/default/file_stream.rs index 075716a75..d4933ba6a 100644 --- a/kernel/src/engine/default/file_stream.rs +++ b/kernel/src/engine/default/file_stream.rs @@ -16,14 +16,14 @@ use crate::engine::arrow_data::ArrowEngineData; use crate::{DeltaResult, FileDataReadResultIterator, FileMeta}; /// A fallible future that resolves to a stream of [`RecordBatch`] -/// cbindgen:ignore +// cbindgen:ignore pub type FileOpenFuture = BoxFuture<'static, DeltaResult>>>; /// Generic API for opening a file using an [`ObjectStore`] and resolving to a /// stream of [`RecordBatch`] /// -/// [`ObjectStore`]: object_store::ObjectStore +/// [ObjectStore]: object_store::ObjectStore pub trait FileOpener: Send + Unpin { /// Asynchronously open the specified file and return a stream /// of [`RecordBatch`] diff --git a/kernel/src/engine/default/mod.rs b/kernel/src/engine/default/mod.rs index d89cf29cd..12675030b 100644 --- a/kernel/src/engine/default/mod.rs +++ b/kernel/src/engine/default/mod.rs @@ -13,7 +13,6 @@ use self::storage::parse_url_opts; use object_store::{path::Path, DynObjectStore}; use url::Url; -use self::executor::TaskExecutor; use self::filesystem::ObjectStoreFileSystemClient; use self::json::DefaultJsonHandler; use self::parquet::DefaultParquetHandler; @@ -33,6 +32,10 @@ pub mod json; pub mod parquet; pub mod storage; +#[cfg(feature = "tokio")] +pub use executor::tokio::*; +pub use executor::*; + #[derive(Debug)] pub struct DefaultEngine { store: Arc, diff --git a/kernel/src/expressions/column_names.rs b/kernel/src/expressions/column_names.rs index 0ea7a7067..7a2e959ac 100644 --- a/kernel/src/expressions/column_names.rs +++ b/kernel/src/expressions/column_names.rs @@ -183,7 +183,7 @@ impl Hash for ColumnName { /// assert_eq!(colname, parsed); /// ``` /// -/// [`FromStr`]: std::str::FromStr +/// [FromStr]: std::str::FromStr impl Display for ColumnName { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { for (i, s) in self.iter().enumerate() { diff --git a/kernel/src/lib.rs b/kernel/src/lib.rs index 1d6902d86..82f281d67 100644 --- a/kernel/src/lib.rs +++ b/kernel/src/lib.rs @@ -43,7 +43,7 @@ //! //! Delta Kernel needs to perform some basic operations against file systems like listing and //! reading files. These interactions are encapsulated in the [`FileSystemClient`] trait. -//! Implementors must take care that all assumptions on the behavior if the functions - like sorted +//! Implementors must take care that all assumptions on the behavior of the functions - like sorted //! results - are respected. //! //! ## Reading log and data files @@ -103,6 +103,7 @@ pub use delta_kernel_derive; pub use engine_data::{EngineData, RowVisitor}; pub use error::{DeltaResult, Error}; pub use expressions::{Expression, ExpressionRef}; +pub use snapshot::Snapshot; pub use table::Table; #[cfg(any( @@ -329,8 +330,8 @@ pub trait ExpressionHandler: AsAny { /// - `expression`: Expression to evaluate. /// - `output_type`: Expected result data type. /// - /// [`Schema`]: crate::schema::StructType - /// [`DataType`]: crate::schema::DataType + /// [Schema]: crate::schema::StructType + /// [DataType]: crate::schema::DataType fn get_evaluator( &self, schema: SchemaRef, diff --git a/kernel/src/log_segment.rs b/kernel/src/log_segment.rs index 24d78a986..d7e9ad693 100644 --- a/kernel/src/log_segment.rs +++ b/kernel/src/log_segment.rs @@ -31,7 +31,7 @@ mod tests; /// [`LogSegment`] is used in [`Snapshot`] when built with [`LogSegment::for_snapshot`], and /// and in `TableChanges` when built with [`LogSegment::for_table_changes`]. /// -/// [`Snapshot`]: crate::snapshot::Snapshot +/// [Snapshot]: crate::snapshot::Snapshot #[derive(Debug)] #[cfg_attr(feature = "developer-visibility", visibility::make(pub))] pub(crate) struct LogSegment { @@ -107,7 +107,7 @@ impl LogSegment { /// - `checkpoint_hint`: a `CheckpointMetadata` to start the log segment from (e.g. from reading the `last_checkpoint` file). /// - `time_travel_version`: The version of the log that the Snapshot will be at. /// - /// [`Snapshot`]: crate::snapshot::Snapshot + /// [Snapshot]: crate::snapshot::Snapshot #[cfg_attr(feature = "developer-visibility", visibility::make(pub))] pub(crate) fn for_snapshot( fs_client: &dyn FileSystemClient, @@ -187,11 +187,15 @@ impl LogSegment { /// The boolean flags indicates whether the data was read from /// a commit file (true) or a checkpoint file (false). /// - /// `read_schema` is the schema to read the log files with. This can be used - /// to project the log files to a subset of the columns. + /// # Arguments /// - /// `meta_predicate` is an optional expression to filter the log files with. It is _NOT_ the - /// query's predicate, but rather a predicate for filtering log files themselves. + /// - `engine` is the engine to use to read and process the log files. + /// - `commit_read_schema` is the schema to read the commit files with. This can be used + /// to project the log files to a subset of the columns. + /// - `checkpoint_read_schema` is the schema to read the checkpoint files with. This can be used + /// to project the log files to a subset of the columns. + /// - `meta_predicate` is an optional expression to filter the log files with. It is _NOT_ the + /// query's predicate, but rather a predicate for filtering log files themselves. #[cfg_attr(feature = "developer-visibility", visibility::make(pub))] pub(crate) fn replay( &self, diff --git a/kernel/src/scan/mod.rs b/kernel/src/scan/mod.rs index e0d345b56..1044d267e 100644 --- a/kernel/src/scan/mod.rs +++ b/kernel/src/scan/mod.rs @@ -8,6 +8,7 @@ use itertools::Itertools; use tracing::debug; use url::Url; +use self::log_replay::scan_action_iter; use crate::actions::deletion_vector::{ deletion_treemap_to_bools, split_vector, DeletionVectorDescriptor, }; @@ -16,7 +17,6 @@ use crate::expressions::{ColumnName, Expression, ExpressionRef, ExpressionTransf use crate::predicates::parquet_stats_skipping::{ ParquetStatsProvider, ParquetStatsSkippingFilter as _, }; -use crate::scan::state::{DvInfo, Stats}; use crate::schema::{ ArrayType, DataType, MapType, PrimitiveType, Schema, SchemaRef, SchemaTransform, StructField, StructType, @@ -25,13 +25,12 @@ use crate::snapshot::Snapshot; use crate::table_features::ColumnMappingMode; use crate::{DeltaResult, Engine, EngineData, Error, FileMeta}; -use self::log_replay::scan_action_iter; -use self::state::GlobalScanState; - pub(crate) mod data_skipping; pub mod log_replay; pub mod state; +pub use state::*; + /// Builder to scan a snapshot of a table. pub struct ScanBuilder { snapshot: Arc, @@ -63,8 +62,8 @@ impl ScanBuilder { /// A table with columns `[a, b, c]` could have a scan which reads only the first /// two columns by using the schema `[a, b]`. /// - /// [`Schema`]: crate::schema::Schema - /// [`Snapshot`]: crate::snapshot::Snapshot + /// [Schema]: crate::schema::Schema + /// [Snapshot]: crate::snapshot::Snapshot pub fn with_schema(mut self, schema: SchemaRef) -> Self { self.schema = Some(schema); self @@ -273,7 +272,7 @@ pub struct ScanResult { /// that this data may include data that should be filtered out based on the mask given by /// [`full_mask`]. /// - /// [`full_mask`]: #method.full_mask + /// [full_mask]: #method.full_mask pub raw_data: DeltaResult>, /// Raw row mask. // TODO(nick) this should be allocated by the engine @@ -289,7 +288,7 @@ impl ScanResult { /// to extend the mask to the full length of the batch or arrow will drop the extra /// rows. Calling [`full_mask`] instead avoids this risk entirely, at the cost of a copy. /// - /// [`full_mask`]: #method.full_mask + /// [full_mask]: #method.full_mask pub fn raw_mask(&self) -> Option<&Vec> { self.raw_mask.as_ref() } @@ -345,7 +344,7 @@ impl std::fmt::Debug for Scan { impl Scan { /// Get a shared reference to the [`Schema`] of the scan. /// - /// [`Schema`]: crate::schema::Schema + /// [Schema]: crate::schema::Schema pub fn schema(&self) -> &SchemaRef { &self.logical_schema } @@ -466,7 +465,7 @@ impl Scan { .map(|res| { let (data, vec) = res?; let scan_files = vec![]; - state::visit_scan_files(data.as_ref(), &vec, scan_files, scan_data_callback) + visit_scan_files(data.as_ref(), &vec, scan_files, scan_data_callback) }) // Iterator>> to Iterator> .flatten_ok(); diff --git a/kernel/src/scan/state.rs b/kernel/src/scan/state.rs index b57f0c120..383fc2f89 100644 --- a/kernel/src/scan/state.rs +++ b/kernel/src/scan/state.rs @@ -21,10 +21,15 @@ use super::log_replay::SCAN_ROW_SCHEMA; /// State that doesn't change between scans #[derive(Clone, Debug, Serialize, Deserialize)] pub struct GlobalScanState { + /// Storage location where the table is stored as a URL pub table_root: String, + /// Columns this table is partitioned by pub partition_columns: Vec, + /// Logical schema of the table including computed and/or mapped columns pub logical_schema: SchemaRef, + /// Physical schema of the table as it is stored on disk pub physical_schema: SchemaRef, + /// Column mapping mode for this table pub column_mapping_mode: ColumnMappingMode, } diff --git a/kernel/src/snapshot.rs b/kernel/src/snapshot.rs index 75f52ab78..11f40843d 100644 --- a/kernel/src/snapshot.rs +++ b/kernel/src/snapshot.rs @@ -182,8 +182,8 @@ struct CheckpointMetadata { /// the read. Thus, the semantics of this function are to return `None` if the file is not found or /// is invalid JSON. Unexpected/unrecoverable errors are returned as `Err` case and are assumed to /// cause failure. -/// -/// TODO: java kernel retries three times before failing, should we do the same? +// +// TODO: java kernel retries three times before failing, should we do the same? fn read_last_checkpoint( fs_client: &dyn FileSystemClient, log_root: &Url, diff --git a/kernel/src/table_changes/mod.rs b/kernel/src/table_changes/mod.rs index b74f65b7a..a777cb52b 100644 --- a/kernel/src/table_changes/mod.rs +++ b/kernel/src/table_changes/mod.rs @@ -75,7 +75,7 @@ static CDF_FIELDS: LazyLock<[StructField; 3]> = LazyLock::new(|| { /// file modification time of the log file. No timezone is associated with the timestamp. /// /// Currently, in-commit timestamps (ICT) is not supported. In the future when ICT is enabled, the -/// timestamp will be retrieved from the `inCommitTimestamp` field of the CommitInfo` action. +/// timestamp will be retrieved from the `inCommitTimestamp` field of the [`CommitInfo`] action. /// See issue [#559](https://github.com/delta-io/delta-kernel-rs/issues/559) /// For details on In-Commit Timestamps, see the [Protocol](https://github.com/delta-io/delta/blob/master/PROTOCOL.md#in-commit-timestamps). /// @@ -93,6 +93,7 @@ static CDF_FIELDS: LazyLock<[StructField; 3]> = LazyLock::new(|| { /// future to allow compatible schemas that are not the exact same. /// See issue [#523](https://github.com/delta-io/delta-kernel-rs/issues/523) /// +/// [CommitInfo]: crate::actions::CommitInfo /// # Examples /// Get `TableChanges` for versions 0 to 1 (inclusive) /// ```rust diff --git a/kernel/src/table_changes/scan.rs b/kernel/src/table_changes/scan.rs index 9b0ba3067..9e8a769e8 100644 --- a/kernel/src/table_changes/scan.rs +++ b/kernel/src/table_changes/scan.rs @@ -46,7 +46,7 @@ pub struct TableChangesScan { /// Note: There is a lot of shared functionality between [`TableChangesScanBuilder`] and /// [`ScanBuilder`]. /// -/// [`ScanBuilder`]: crate::scan::ScanBuilder +/// [ScanBuilder]: crate::scan::ScanBuilder /// # Example /// Construct a [`TableChangesScan`] from `table_changes` with a given schema and predicate /// ```rust @@ -91,7 +91,7 @@ impl TableChangesScanBuilder { /// A table with columns `[a, b, c]` could have a scan which reads only the first /// two columns by using the schema `[a, b]`. /// - /// [`Schema`]: crate::schema::Schema + /// [Schema]: crate::schema::Schema pub fn with_schema(mut self, schema: impl Into>) -> Self { self.schema = schema.into(); self @@ -217,7 +217,7 @@ impl TableChangesScan { /// Get a shared reference to the [`Schema`] of the table changes scan. /// - /// [`Schema`]: crate::schema::Schema + /// [Schema]: crate::schema::Schema pub fn schema(&self) -> &SchemaRef { &self.logical_schema } diff --git a/kernel/src/transaction.rs b/kernel/src/transaction.rs index c6e93ea7b..40524442b 100644 --- a/kernel/src/transaction.rs +++ b/kernel/src/transaction.rs @@ -31,7 +31,7 @@ pub(crate) static WRITE_METADATA_SCHEMA: LazyLock = LazyLock::new(|| /// Get the expected schema for engine data passed to [`add_write_metadata`]. /// -/// [`add_write_metadata`]: crate::transaction::Transaction::add_write_metadata +/// [add_write_metadata]: crate::transaction::Transaction::add_write_metadata pub fn get_write_metadata_schema() -> &'static SchemaRef { &WRITE_METADATA_SCHEMA } @@ -209,7 +209,7 @@ fn generate_adds<'a>( /// WriteContext is data derived from a [`Transaction`] that can be provided to writers in order to /// write table data. /// -/// [`Transaction`]: struct.Transaction.html +/// [Transaction]: struct.Transaction.html pub struct WriteContext { target_dir: Url, schema: SchemaRef, From 196045d4d9282f3858e9b6cca15afda63ad23ab1 Mon Sep 17 00:00:00 2001 From: Robert Pack Date: Thu, 19 Dec 2024 18:22:40 +0100 Subject: [PATCH 2/3] fix: cbindgen comment Signed-off-by: Robert Pack --- kernel/src/engine/default/file_stream.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/src/engine/default/file_stream.rs b/kernel/src/engine/default/file_stream.rs index d4933ba6a..e0bb39b79 100644 --- a/kernel/src/engine/default/file_stream.rs +++ b/kernel/src/engine/default/file_stream.rs @@ -16,7 +16,7 @@ use crate::engine::arrow_data::ArrowEngineData; use crate::{DeltaResult, FileDataReadResultIterator, FileMeta}; /// A fallible future that resolves to a stream of [`RecordBatch`] -// cbindgen:ignore +/// cbindgen:ignore pub type FileOpenFuture = BoxFuture<'static, DeltaResult>>>; From b125335a9fb4ac1a99ae891f41f1884d960500b1 Mon Sep 17 00:00:00 2001 From: Robert Pack Date: Fri, 20 Dec 2024 10:57:57 +0100 Subject: [PATCH 3/3] fix: revert link backticks Signed-off-by: Robert Pack --- kernel/src/engine/default/file_stream.rs | 2 +- kernel/src/expressions/column_names.rs | 2 +- kernel/src/lib.rs | 4 ++-- kernel/src/log_segment.rs | 4 ++-- kernel/src/scan/mod.rs | 4 ++-- kernel/src/table_changes/mod.rs | 2 +- kernel/src/table_changes/scan.rs | 6 +++--- kernel/src/transaction.rs | 2 +- 8 files changed, 13 insertions(+), 13 deletions(-) diff --git a/kernel/src/engine/default/file_stream.rs b/kernel/src/engine/default/file_stream.rs index e0bb39b79..075716a75 100644 --- a/kernel/src/engine/default/file_stream.rs +++ b/kernel/src/engine/default/file_stream.rs @@ -23,7 +23,7 @@ pub type FileOpenFuture = /// Generic API for opening a file using an [`ObjectStore`] and resolving to a /// stream of [`RecordBatch`] /// -/// [ObjectStore]: object_store::ObjectStore +/// [`ObjectStore`]: object_store::ObjectStore pub trait FileOpener: Send + Unpin { /// Asynchronously open the specified file and return a stream /// of [`RecordBatch`] diff --git a/kernel/src/expressions/column_names.rs b/kernel/src/expressions/column_names.rs index 7a2e959ac..0ea7a7067 100644 --- a/kernel/src/expressions/column_names.rs +++ b/kernel/src/expressions/column_names.rs @@ -183,7 +183,7 @@ impl Hash for ColumnName { /// assert_eq!(colname, parsed); /// ``` /// -/// [FromStr]: std::str::FromStr +/// [`FromStr`]: std::str::FromStr impl Display for ColumnName { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { for (i, s) in self.iter().enumerate() { diff --git a/kernel/src/lib.rs b/kernel/src/lib.rs index 82f281d67..17461ec22 100644 --- a/kernel/src/lib.rs +++ b/kernel/src/lib.rs @@ -330,8 +330,8 @@ pub trait ExpressionHandler: AsAny { /// - `expression`: Expression to evaluate. /// - `output_type`: Expected result data type. /// - /// [Schema]: crate::schema::StructType - /// [DataType]: crate::schema::DataType + /// [`Schema`]: crate::schema::StructType + /// [`DataType`]: crate::schema::DataType fn get_evaluator( &self, schema: SchemaRef, diff --git a/kernel/src/log_segment.rs b/kernel/src/log_segment.rs index d7e9ad693..399216904 100644 --- a/kernel/src/log_segment.rs +++ b/kernel/src/log_segment.rs @@ -31,7 +31,7 @@ mod tests; /// [`LogSegment`] is used in [`Snapshot`] when built with [`LogSegment::for_snapshot`], and /// and in `TableChanges` when built with [`LogSegment::for_table_changes`]. /// -/// [Snapshot]: crate::snapshot::Snapshot +/// [`Snapshot`]: crate::snapshot::Snapshot #[derive(Debug)] #[cfg_attr(feature = "developer-visibility", visibility::make(pub))] pub(crate) struct LogSegment { @@ -107,7 +107,7 @@ impl LogSegment { /// - `checkpoint_hint`: a `CheckpointMetadata` to start the log segment from (e.g. from reading the `last_checkpoint` file). /// - `time_travel_version`: The version of the log that the Snapshot will be at. /// - /// [Snapshot]: crate::snapshot::Snapshot + /// [`Snapshot`]: crate::snapshot::Snapshot #[cfg_attr(feature = "developer-visibility", visibility::make(pub))] pub(crate) fn for_snapshot( fs_client: &dyn FileSystemClient, diff --git a/kernel/src/scan/mod.rs b/kernel/src/scan/mod.rs index 1044d267e..676b7d84d 100644 --- a/kernel/src/scan/mod.rs +++ b/kernel/src/scan/mod.rs @@ -272,7 +272,7 @@ pub struct ScanResult { /// that this data may include data that should be filtered out based on the mask given by /// [`full_mask`]. /// - /// [full_mask]: #method.full_mask + /// [`full_mask`]: #method.full_mask pub raw_data: DeltaResult>, /// Raw row mask. // TODO(nick) this should be allocated by the engine @@ -288,7 +288,7 @@ impl ScanResult { /// to extend the mask to the full length of the batch or arrow will drop the extra /// rows. Calling [`full_mask`] instead avoids this risk entirely, at the cost of a copy. /// - /// [full_mask]: #method.full_mask + /// [`full_mask`]: #method.full_mask pub fn raw_mask(&self) -> Option<&Vec> { self.raw_mask.as_ref() } diff --git a/kernel/src/table_changes/mod.rs b/kernel/src/table_changes/mod.rs index a777cb52b..bf360e2e0 100644 --- a/kernel/src/table_changes/mod.rs +++ b/kernel/src/table_changes/mod.rs @@ -93,7 +93,7 @@ static CDF_FIELDS: LazyLock<[StructField; 3]> = LazyLock::new(|| { /// future to allow compatible schemas that are not the exact same. /// See issue [#523](https://github.com/delta-io/delta-kernel-rs/issues/523) /// -/// [CommitInfo]: crate::actions::CommitInfo +/// [`CommitInfo`]: crate::actions::CommitInfo /// # Examples /// Get `TableChanges` for versions 0 to 1 (inclusive) /// ```rust diff --git a/kernel/src/table_changes/scan.rs b/kernel/src/table_changes/scan.rs index 9e8a769e8..9b0ba3067 100644 --- a/kernel/src/table_changes/scan.rs +++ b/kernel/src/table_changes/scan.rs @@ -46,7 +46,7 @@ pub struct TableChangesScan { /// Note: There is a lot of shared functionality between [`TableChangesScanBuilder`] and /// [`ScanBuilder`]. /// -/// [ScanBuilder]: crate::scan::ScanBuilder +/// [`ScanBuilder`]: crate::scan::ScanBuilder /// # Example /// Construct a [`TableChangesScan`] from `table_changes` with a given schema and predicate /// ```rust @@ -91,7 +91,7 @@ impl TableChangesScanBuilder { /// A table with columns `[a, b, c]` could have a scan which reads only the first /// two columns by using the schema `[a, b]`. /// - /// [Schema]: crate::schema::Schema + /// [`Schema`]: crate::schema::Schema pub fn with_schema(mut self, schema: impl Into>) -> Self { self.schema = schema.into(); self @@ -217,7 +217,7 @@ impl TableChangesScan { /// Get a shared reference to the [`Schema`] of the table changes scan. /// - /// [Schema]: crate::schema::Schema + /// [`Schema`]: crate::schema::Schema pub fn schema(&self) -> &SchemaRef { &self.logical_schema } diff --git a/kernel/src/transaction.rs b/kernel/src/transaction.rs index 40524442b..f30d8cff0 100644 --- a/kernel/src/transaction.rs +++ b/kernel/src/transaction.rs @@ -31,7 +31,7 @@ pub(crate) static WRITE_METADATA_SCHEMA: LazyLock = LazyLock::new(|| /// Get the expected schema for engine data passed to [`add_write_metadata`]. /// -/// [add_write_metadata]: crate::transaction::Transaction::add_write_metadata +/// [`add_write_metadata`]: crate::transaction::Transaction::add_write_metadata pub fn get_write_metadata_schema() -> &'static SchemaRef { &WRITE_METADATA_SCHEMA }