From f2267e672ea50668e095349617b82af0698dabb3 Mon Sep 17 00:00:00 2001 From: Jorge Hermo Date: Mon, 30 Dec 2024 13:27:09 +0100 Subject: [PATCH 01/23] feat: initial OperatorRegistry implementation --- core/src/types/operator/mod.rs | 7 ++++ core/src/types/operator/registry.rs | 50 +++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100644 core/src/types/operator/registry.rs diff --git a/core/src/types/operator/mod.rs b/core/src/types/operator/mod.rs index 879f113008ce..5c7d1f48f1c2 100644 --- a/core/src/types/operator/mod.rs +++ b/core/src/types/operator/mod.rs @@ -32,3 +32,10 @@ pub use metadata::OperatorInfo; pub mod operator_functions; pub mod operator_futures; + +// TODO: should we make the registry module public or export the OperatorFactory and OperatorRegistry +// types directly? + +mod registry; +pub use registry::OperatorFactory; +pub use registry::OperatorRegistry; diff --git a/core/src/types/operator/registry.rs b/core/src/types/operator/registry.rs new file mode 100644 index 000000000000..2113bdf39214 --- /dev/null +++ b/core/src/types/operator/registry.rs @@ -0,0 +1,50 @@ +use std::collections::HashMap; + +use crate::*; + +pub type OperatorFactory = fn(String, HashMap) -> Result; + +// TODO: create an static registry? or a global() method of OperatorRegistry that lazily initializes the registry? +// Register only services in `Scheme::enabled()` + +pub struct OperatorRegistry { + // TODO: add Arc> to make it cheap to clone + thread safe? or is it not needed? + registry: HashMap, +} + +impl OperatorRegistry { + pub fn register(&mut self, scheme: &str, factory: OperatorFactory) { + // TODO: should we receive a `&str` or a `String`? we are cloning it anyway + self.registry.insert(scheme.to_string(), factory); + } + pub fn parse( + &self, + uri: &str, + options: impl IntoIterator, + ) -> Result { + let uri = http::Uri::try_from(uri).map_err(|err| { + Error::new(ErrorKind::ConfigInvalid, "uri is invalid") + .with_context("uri", uri) + .set_source(err) + })?; + + let scheme = uri.scheme().ok_or_else(|| { + Error::new(ErrorKind::ConfigInvalid, "uri is missing scheme").with_context("uri", uri) + })?; + + let factory = self.registry.get(scheme).ok_or_else(|| { + Error::new( + ErrorKind::ConfigInvalid, + "could not find any operator factory for the given scheme", + ) + .with_context("uri", uri) + .with_context("scheme", scheme) + })?; + + // TODO: `OperatorFactory` should receive `IntoIterator` instead of `HashMap`? + let options = options.into_iter().collect(); + + // TODO: `OperatorFactory` should use `&str` instead of `String`? we are cloning it anyway + factory(uri.path().to_string(), options) + } +} From f581316bfbecface3835e3be892e5586af211718 Mon Sep 17 00:00:00 2001 From: Jorge Hermo Date: Mon, 30 Dec 2024 14:03:35 +0100 Subject: [PATCH 02/23] feat: continue with operator registry implementation --- core/src/types/builder.rs | 5 ++++ core/src/types/operator/registry.rs | 42 +++++++++++++++++++++++++---- 2 files changed, 42 insertions(+), 5 deletions(-) diff --git a/core/src/types/builder.rs b/core/src/types/builder.rs index 43b05e582dc2..bda8fd92ab96 100644 --- a/core/src/types/builder.rs +++ b/core/src/types/builder.rs @@ -137,6 +137,11 @@ pub trait Configurator: Serialize + DeserializeOwned + Debug + 'static { }) } + // TODO: document this. + fn from_uri(uri: &str, options: impl IntoIterator) -> Result { + todo!() + } + /// Convert this configuration into a service builder. fn into_builder(self) -> Self::Builder; } diff --git a/core/src/types/operator/registry.rs b/core/src/types/operator/registry.rs index 2113bdf39214..9586238eb099 100644 --- a/core/src/types/operator/registry.rs +++ b/core/src/types/operator/registry.rs @@ -1,8 +1,10 @@ use std::collections::HashMap; +use crate::services::Http; use crate::*; -pub type OperatorFactory = fn(String, HashMap) -> Result; +// In order to reduce boilerplate, we should return in this function a `Builder` instead of operator?. +pub type OperatorFactory = fn(&str, HashMap) -> Result; // TODO: create an static registry? or a global() method of OperatorRegistry that lazily initializes the registry? // Register only services in `Scheme::enabled()` @@ -13,6 +15,12 @@ pub struct OperatorRegistry { } impl OperatorRegistry { + pub fn new() -> Self { + Self { + registry: HashMap::new(), + } + } + pub fn register(&mut self, scheme: &str, factory: OperatorFactory) { // TODO: should we receive a `&str` or a `String`? we are cloning it anyway self.registry.insert(scheme.to_string(), factory); @@ -22,17 +30,17 @@ impl OperatorRegistry { uri: &str, options: impl IntoIterator, ) -> Result { - let uri = http::Uri::try_from(uri).map_err(|err| { + let parsed_uri = http::Uri::try_from(uri).map_err(|err| { Error::new(ErrorKind::ConfigInvalid, "uri is invalid") .with_context("uri", uri) .set_source(err) })?; - let scheme = uri.scheme().ok_or_else(|| { + let scheme = parsed_uri.scheme().ok_or_else(|| { Error::new(ErrorKind::ConfigInvalid, "uri is missing scheme").with_context("uri", uri) })?; - let factory = self.registry.get(scheme).ok_or_else(|| { + let factory = self.registry.get(scheme.as_str()).ok_or_else(|| { Error::new( ErrorKind::ConfigInvalid, "could not find any operator factory for the given scheme", @@ -45,6 +53,30 @@ impl OperatorRegistry { let options = options.into_iter().collect(); // TODO: `OperatorFactory` should use `&str` instead of `String`? we are cloning it anyway - factory(uri.path().to_string(), options) + factory(uri, options) + } + + pub fn global() -> Self { + let mut registry = Self::new(); + // TODO: have a `Builder::enabled()` method that returns the set of enabled services builders? + // Similar to `Scheme::Enabled()` + // or have an `Scheme::associated_builder` that given a scheme returns the associated builder? + + registry.register_builder::(); + registry + } + + fn register_builder(&mut self) { + self.register( + B::SCHEME.into_static(), + operator_factory_from_configurator::(), + ); + } +} + +fn operator_factory_from_configurator() -> OperatorFactory { + |uri, options| { + let builder = C::from_uri(uri, options)?.into_builder(); + Operator::new(builder).map(OperatorBuilder::finish) } } From b67be3899db8ce15e57bd5f36d1c776abab19b09 Mon Sep 17 00:00:00 2001 From: Jorge Hermo Date: Mon, 30 Dec 2024 14:30:06 +0100 Subject: [PATCH 03/23] feat: register enabled services & set global registry --- core/src/services/mod.rs | 5 + core/src/types/operator/registry.rs | 137 +++++++++++++++++++++++++++- core/src/types/scheme.rs | 2 + 3 files changed, 139 insertions(+), 5 deletions(-) diff --git a/core/src/services/mod.rs b/core/src/services/mod.rs index 0437dff4a759..06456caf8d58 100644 --- a/core/src/services/mod.rs +++ b/core/src/services/mod.rs @@ -19,13 +19,18 @@ //! //! More ongoing services support is tracked at [opendal#5](https://github.com/apache/opendal/issues/5). Please feel free to submit issues if there are services not covered. +use crate::Builder; +use std::collections::HashSet; + mod aliyun_drive; + pub use aliyun_drive::*; mod alluxio; pub use alluxio::*; mod atomicserver; + pub use self::atomicserver::*; mod azblob; diff --git a/core/src/types/operator/registry.rs b/core/src/types/operator/registry.rs index 9586238eb099..f24488ed12a9 100644 --- a/core/src/types/operator/registry.rs +++ b/core/src/types/operator/registry.rs @@ -1,8 +1,15 @@ +use std::cell::LazyCell; use std::collections::HashMap; -use crate::services::Http; +use crate::services::*; use crate::*; +// TODO: thread local or use LazyLock instead? +thread_local! { +pub static GLOBAL_REGISTRY: LazyCell = + LazyCell::new(|| OperatorRegistry::with_enabled_services()); +} + // In order to reduce boilerplate, we should return in this function a `Builder` instead of operator?. pub type OperatorFactory = fn(&str, HashMap) -> Result; @@ -25,6 +32,7 @@ impl OperatorRegistry { // TODO: should we receive a `&str` or a `String`? we are cloning it anyway self.registry.insert(scheme.to_string(), factory); } + pub fn parse( &self, uri: &str, @@ -56,13 +64,132 @@ impl OperatorRegistry { factory(uri, options) } - pub fn global() -> Self { + pub fn with_enabled_services() -> Self { let mut registry = Self::new(); - // TODO: have a `Builder::enabled()` method that returns the set of enabled services builders? + // TODO: is this correct? have a `Builder::enabled()` method that returns the set of enabled services builders? // Similar to `Scheme::Enabled()` // or have an `Scheme::associated_builder` that given a scheme returns the associated builder? - + #[cfg(feature = "services-aliyun-drive")] + registry.register_builder::(); + #[cfg(feature = "services-atomicserver")] + registry.register_builder::(); + #[cfg(feature = "services-alluxio")] + registry.register_builder::(); + #[cfg(feature = "services-azblob")] + registry.register_builder::(); + #[cfg(feature = "services-azdls")] + registry.register_builder::(); + #[cfg(feature = "services-azfile")] + registry.register_builder::(); + #[cfg(feature = "services-b2")] + registry.register_builder::(); + #[cfg(feature = "services-cacache")] + registry.register_builder::(); + #[cfg(feature = "services-cos")] + registry.register_builder::(); + #[cfg(feature = "services-compfs")] + registry.register_builder::(); + #[cfg(feature = "services-dashmap")] + registry.register_builder::(); + #[cfg(feature = "services-dropbox")] + registry.register_builder::(); + #[cfg(feature = "services-etcd")] + registry.register_builder::(); + #[cfg(feature = "services-foundationdb")] + registry.register_builder::(); + #[cfg(feature = "services-fs")] + registry.register_builder::(); + #[cfg(feature = "services-ftp")] + registry.register_builder::(); + #[cfg(feature = "services-gcs")] + registry.register_builder::(); + #[cfg(feature = "services-ghac")] + registry.register_builder::(); + #[cfg(feature = "services-hdfs")] + registry.register_builder::(); + #[cfg(feature = "services-http")] registry.register_builder::(); + #[cfg(feature = "services-huggingface")] + registry.register_builder::(); + #[cfg(feature = "services-ipfs")] + registry.register_builder::(); + #[cfg(feature = "services-ipmfs")] + registry.register_builder::(); + #[cfg(feature = "services-icloud")] + registry.register_builder::(); + #[cfg(feature = "services-libsql")] + registry.register_builder::(); + #[cfg(feature = "services-memcached")] + registry.register_builder::(); + #[cfg(feature = "services-memory")] + registry.register_builder::(); + #[cfg(feature = "services-mini-moka")] + registry.register_builder::(); + #[cfg(feature = "services-moka")] + registry.register_builder::(); + #[cfg(feature = "services-monoiofs")] + registry.register_builder::(); + #[cfg(feature = "services-mysql")] + registry.register_builder::(); + #[cfg(feature = "services-obs")] + registry.register_builder::(); + #[cfg(feature = "services-onedrive")] + registry.register_builder::(); + #[cfg(feature = "services-postgresql")] + registry.register_builder::(); + #[cfg(feature = "services-gdrive")] + registry.register_builder::(); + #[cfg(feature = "services-oss")] + registry.register_builder::(); + #[cfg(feature = "services-persy")] + registry.register_builder::(); + #[cfg(feature = "services-redis")] + registry.register_builder::(); + #[cfg(feature = "services-rocksdb")] + registry.register_builder::(); + #[cfg(feature = "services-s3")] + registry.register_builder::(); + #[cfg(feature = "services-seafile")] + registry.register_builder::(); + #[cfg(feature = "services-upyun")] + registry.register_builder::(); + #[cfg(feature = "services-yandex-disk")] + registry.register_builder::(); + #[cfg(feature = "services-pcloud")] + registry.register_builder::(); + #[cfg(feature = "services-sftp")] + registry.register_builder::(); + #[cfg(feature = "services-sled")] + registry.register_builder::(); + #[cfg(feature = "services-sqlite")] + registry.register_builder::(); + #[cfg(feature = "services-supabase")] + registry.register_builder::(); + #[cfg(feature = "services-swift")] + registry.register_builder::(); + #[cfg(feature = "services-tikv")] + registry.register_builder::(); + #[cfg(feature = "services-vercel-artifacts")] + registry.register_builder::(); + #[cfg(feature = "services-vercel-blob")] + registry.register_builder::(); + #[cfg(feature = "services-webdav")] + registry.register_builder::(); + #[cfg(feature = "services-webhdfs")] + registry.register_builder::(); + #[cfg(feature = "services-redb")] + registry.register_builder::(); + #[cfg(feature = "services-mongodb")] + registry.register_builder::(); + #[cfg(feature = "services-hdfs-native")] + registry.register_builder::(); + #[cfg(feature = "services-surrealdb")] + registry.register_builder::(); + #[cfg(feature = "services-lakefs")] + registry.register_builder::(); + #[cfg(feature = "services-nebula-graph")] + registry.register_builder::(); + registry } @@ -77,6 +204,6 @@ impl OperatorRegistry { fn operator_factory_from_configurator() -> OperatorFactory { |uri, options| { let builder = C::from_uri(uri, options)?.into_builder(); - Operator::new(builder).map(OperatorBuilder::finish) + Ok(Operator::new(builder)?.finish()) } } diff --git a/core/src/types/scheme.rs b/core/src/types/scheme.rs index c0da5219b829..f70f6e1236ef 100644 --- a/core/src/types/scheme.rs +++ b/core/src/types/scheme.rs @@ -22,6 +22,8 @@ use std::str::FromStr; use crate::Error; +use super::Builder; + /// Services that OpenDAL supports /// /// # Notes From b8c4fb144783629a54cefa95e6fb71e6840e9833 Mon Sep 17 00:00:00 2001 From: Jorge Hermo Date: Mon, 30 Dec 2024 19:42:20 +0100 Subject: [PATCH 04/23] feat: glue together Operator and global OperatorRegistry --- core/src/types/operator/builder.rs | 10 ++++++++++ core/src/types/operator/registry.rs | 21 +++++++++++++++++---- 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/core/src/types/operator/builder.rs b/core/src/types/operator/builder.rs index 4393cd5e0206..233953140973 100644 --- a/core/src/types/operator/builder.rs +++ b/core/src/types/operator/builder.rs @@ -22,6 +22,8 @@ use crate::layers::*; use crate::raw::*; use crate::*; +use super::registry::GLOBAL_OPERATOR_REGISTRY; + /// # Operator build API /// /// Operator should be built via [`OperatorBuilder`]. We recommend to use [`Operator::new`] to get started: @@ -95,6 +97,14 @@ impl Operator { Ok(OperatorBuilder::new(acc)) } + /// TODO: document this. + pub fn from_uri( + uri: &str, + options: impl IntoIterator, + ) -> Result { + GLOBAL_OPERATOR_REGISTRY.with(|registry| registry.parse(uri, options)) + } + /// Create a new operator from given iterator in static dispatch. /// /// # Notes diff --git a/core/src/types/operator/registry.rs b/core/src/types/operator/registry.rs index f24488ed12a9..2825e406f9f4 100644 --- a/core/src/types/operator/registry.rs +++ b/core/src/types/operator/registry.rs @@ -4,10 +4,9 @@ use std::collections::HashMap; use crate::services::*; use crate::*; -// TODO: thread local or use LazyLock instead? +// TODO: thread local or use LazyLock instead? this way the access is lock-free thread_local! { -pub static GLOBAL_REGISTRY: LazyCell = - LazyCell::new(|| OperatorRegistry::with_enabled_services()); + pub static GLOBAL_OPERATOR_REGISTRY: LazyCell = LazyCell::new(|| OperatorRegistry::with_enabled_services()); } // In order to reduce boilerplate, we should return in this function a `Builder` instead of operator?. @@ -58,6 +57,7 @@ impl OperatorRegistry { })?; // TODO: `OperatorFactory` should receive `IntoIterator` instead of `HashMap`? + // however, impl Traits in type aliases is unstable and also are not allowed in fn pointers let options = options.into_iter().collect(); // TODO: `OperatorFactory` should use `&str` instead of `String`? we are cloning it anyway @@ -68,7 +68,20 @@ impl OperatorRegistry { let mut registry = Self::new(); // TODO: is this correct? have a `Builder::enabled()` method that returns the set of enabled services builders? // Similar to `Scheme::Enabled()` - // or have an `Scheme::associated_builder` that given a scheme returns the associated builder? + // or have an `Scheme::associated_builder` that given a scheme returns the associated builder? The problem with this + // is that `Scheme` variants are not gate behind a feature gate and the associated builder is. As a workaround + + // TODO: it seems too error-prone to have this list manually updated, we should have a macro that generates this list? + // it could be something like: + // + // ```rust + // apply_for_all_services!{ + // registry.register_builder::<$service>(); + // } + // ``` + // and the apply_for_all_services macro would gate every statement behind the corresponding feature gate + // This seems to not be the place where we should have a "list of enabled services". + // Ther is something similar with `Scheme::enabled()` #[cfg(feature = "services-aliyun-drive")] registry.register_builder::(); #[cfg(feature = "services-atomicserver")] From a50c0582abe2d024e7627eab2e3bc8bb16db3da2 Mon Sep 17 00:00:00 2001 From: Jorge Hermo Date: Mon, 30 Dec 2024 19:49:00 +0100 Subject: [PATCH 05/23] feat: glue together Operator and global OperatorRegistry --- core/src/types/operator/builder.rs | 2 +- core/src/types/operator/mod.rs | 2 +- core/src/types/operator/registry.rs | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/core/src/types/operator/builder.rs b/core/src/types/operator/builder.rs index 233953140973..bad3917bc174 100644 --- a/core/src/types/operator/builder.rs +++ b/core/src/types/operator/builder.rs @@ -21,7 +21,7 @@ use std::sync::Arc; use crate::layers::*; use crate::raw::*; use crate::*; - +// TODO: is this impor path idiomatic to the project? use super::registry::GLOBAL_OPERATOR_REGISTRY; /// # Operator build API diff --git a/core/src/types/operator/mod.rs b/core/src/types/operator/mod.rs index 5c7d1f48f1c2..d28325386ce0 100644 --- a/core/src/types/operator/mod.rs +++ b/core/src/types/operator/mod.rs @@ -35,7 +35,7 @@ pub mod operator_futures; // TODO: should we make the registry module public or export the OperatorFactory and OperatorRegistry // types directly? - mod registry; +// TODO: warning as not used. How can we expose them as public api? pub use registry::OperatorFactory; pub use registry::OperatorRegistry; diff --git a/core/src/types/operator/registry.rs b/core/src/types/operator/registry.rs index 2825e406f9f4..d0d70516f551 100644 --- a/core/src/types/operator/registry.rs +++ b/core/src/types/operator/registry.rs @@ -5,6 +5,7 @@ use crate::services::*; use crate::*; // TODO: thread local or use LazyLock instead? this way the access is lock-free +// TODO: should we expose the `GLOBAL_OPERATOR_REGISTRY` as public API at `crate::types::operator::GLOBAL_OPERATOR_REGISTRY`? thread_local! { pub static GLOBAL_OPERATOR_REGISTRY: LazyCell = LazyCell::new(|| OperatorRegistry::with_enabled_services()); } From 539e6ea1d5c9af7eaaf3e9693ba4c52e2a952ad7 Mon Sep 17 00:00:00 2001 From: Jorge Hermo Date: Mon, 30 Dec 2024 20:25:28 +0100 Subject: [PATCH 06/23] feat: implement Configurator::from_uri --- core/Cargo.lock | 273 +++++++++++++++++++++++++++- core/Cargo.toml | 3 + core/src/types/builder.rs | 44 ++++- core/src/types/operator/registry.rs | 4 +- 4 files changed, 313 insertions(+), 11 deletions(-) diff --git a/core/Cargo.lock b/core/Cargo.lock index 28417ce9e28b..e4b60409e368 100644 --- a/core/Cargo.lock +++ b/core/Cargo.lock @@ -2549,6 +2549,17 @@ dependencies = [ "winapi", ] +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.90", +] + [[package]] name = "dlv-list" version = "0.5.2" @@ -3886,6 +3897,124 @@ dependencies = [ "cc", ] +[[package]] +name = "icu_collections" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locid" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_locid_transform" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_locid_transform_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_locid_transform_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e" + +[[package]] +name = "icu_normalizer" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "utf16_iter", + "utf8_iter", + "write16", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516" + +[[package]] +name = "icu_properties" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93d6020766cfc6302c15dbbc9c8778c37e62c14427cb7f6e601d849e092aeef5" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_locid_transform", + "icu_properties_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569" + +[[package]] +name = "icu_provider" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_provider_macros", + "stable_deref_trait", + "tinystr", + "writeable", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_provider_macros" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.90", +] + [[package]] name = "ident_case" version = "1.0.1" @@ -3904,12 +4033,23 @@ dependencies = [ [[package]] name = "idna" -version = "0.5.0" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" +checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e" dependencies = [ - "unicode-bidi", - "unicode-normalization", + "idna_adapter", + "smallvec", + "utf8_iter", +] + +[[package]] +name = "idna_adapter" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "daca1df1c957320b2cf139ac61e7bd64fed304c5040df000a745aa1de3b4ef71" +dependencies = [ + "icu_normalizer", + "icu_properties", ] [[package]] @@ -4309,6 +4449,12 @@ version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" +[[package]] +name = "litemap" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104" + [[package]] name = "lock_api" version = "0.4.12" @@ -5067,6 +5213,7 @@ dependencies = [ "tracing", "tracing-opentelemetry", "tracing-subscriber", + "url", "uuid", ] @@ -8111,6 +8258,17 @@ dependencies = [ "futures-core", ] +[[package]] +name = "synstructure" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.90", +] + [[package]] name = "tagptr" version = "0.2.0" @@ -8306,6 +8464,16 @@ dependencies = [ "crunchy", ] +[[package]] +name = "tinystr" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f" +dependencies = [ + "displaydoc", + "zerovec", +] + [[package]] name = "tinytemplate" version = "1.2.1" @@ -8894,12 +9062,12 @@ dependencies = [ [[package]] name = "url" -version = "2.5.2" +version = "2.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22784dbdf76fdde8af1aeda5622b546b422b6fc585325248a2bf9f5e41e94d6c" +checksum = "32f8b686cadd1473f4bd0117a5d28d36b1ade384ea9b5069a1c40aefed7fda60" dependencies = [ "form_urlencoded", - "idna 0.5.0", + "idna 1.0.3", "percent-encoding", ] @@ -8915,6 +9083,18 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" +[[package]] +name = "utf16_iter" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246" + +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + [[package]] name = "utf8parse" version = "0.2.2" @@ -9517,6 +9697,18 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "write16" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936" + +[[package]] +name = "writeable" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" + [[package]] name = "ws_stream_wasm" version = "0.7.4" @@ -9569,6 +9761,30 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" +[[package]] +name = "yoke" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "120e6aef9aa629e3d4f52dc8cc43a015c7724194c97dfaf45180d2daf2b77f40" +dependencies = [ + "serde", + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.90", + "synstructure", +] + [[package]] name = "zerocopy" version = "0.7.35" @@ -9590,12 +9806,55 @@ dependencies = [ "syn 2.0.90", ] +[[package]] +name = "zerofrom" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cff3ee08c995dee1859d998dea82f7374f2826091dd9cd47def953cae446cd2e" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "595eed982f7d355beb85837f651fa22e90b3c044842dc7f2c2842c086f295808" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.90", + "synstructure", +] + [[package]] name = "zeroize" version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" +[[package]] +name = "zerovec" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa2b893d79df23bfb12d5461018d408ea19dfafe76c2c7ef6d4eba614f8ff079" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.90", +] + [[package]] name = "zigzag" version = "0.1.0" diff --git a/core/Cargo.toml b/core/Cargo.toml index d404845dd3e2..cf4728709a6a 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -255,6 +255,9 @@ reqwest = { version = "0.12.2", features = [ serde = { version = "1", features = ["derive"] } serde_json = "1" tokio = { version = "1.27", features = ["sync", "io-util"] } +# TODO: I added this dependency in order to not re-implement the Url::query_pairs function. +# If its okay to do it (we are using that crate in the ofs binary https://github.com/apache/opendal/blob/main/bin/ofs/Cargo.toml#L45 +url = "2.5.4" uuid = { version = "1", features = ["serde", "v4"] } # Test only dependencies diff --git a/core/src/types/builder.rs b/core/src/types/builder.rs index bda8fd92ab96..1ae9a993f1e1 100644 --- a/core/src/types/builder.rs +++ b/core/src/types/builder.rs @@ -137,9 +137,49 @@ pub trait Configurator: Serialize + DeserializeOwned + Debug + 'static { }) } - // TODO: document this. + // TODO: should we split `from_uri` into two functions? `from_uri` and `from_uri_opts`? + // So we can have: + // ```rust + // fn from_uri(uri: &str) -> Result {...} + // fn from_uri_opts(uri: &str, options: impl IntoIterator) -> Result {...} + //```? + // This way, we can reduce the boilerplate of passing an empty iterator and + // `let op = Operator::from_uri("fs:///tmp/test", vec![])?;` + // becomes `let op = Operator::from_uri("fs:///tmp/test")?;` which is simpler. + + /// TODO: document this. fn from_uri(uri: &str, options: impl IntoIterator) -> Result { - todo!() + // TODO: We are using the `url::Url` struct instead of `http:Uri`, because `http::Uri` does not implement + // the `query_pairs` method, which we need to extract the options from the uri. + // `http::Uri` has the `query` method (https://docs.rs/http/latest/http/uri/struct.Uri.html#method.query) + // but it outputs the raw query string. Otherwise, we would need to implement the parsing of the query string + // ourselves. + + // TODO: we are using `url::Url`, not an URI. Should we rename this method to `from_url`? (all of the rest of the PR) + // The rfc stated that we would use URIs and not URL, but I wonder if we should refer to just URL instead. + // See the goal section of https://url.spec.whatwg.org/ (the `url` crate implements that spec). + // There they say that the term URI is confusing and that URL + // should be used instead. The `url::Url` crate supports URL fragments, so it should be enough for our use case. + let parsed_url = url::Url::parse(uri).map_err(|err| { + Error::new(ErrorKind::ConfigInvalid, "uri is invalid") + .with_context("uri", uri) + .set_source(err) + })?; + // TODO: I have some doubts about this + // It was inspired from https://github.com/apache/opendal/blob/52c96bb8e8cb4d024ccab1f415c4756447c726dd/bin/ofs/src/main.rs#L60 + // Parameters should be specified in uri's query param. Example: 'fs://?root=' + // this is very similar to https://github.com/apache/opendal/blob/52c96bb8e8cb4d024ccab1f415c4756447c726dd/bin/ofs/README.md?plain=1#L45 + let url_options = parsed_url.query_pairs().into_owned(); + + // TODO: we are not interpreting the host or path params + // the `let op = Operator::from_uri("fs:///tmp/test", vec![])?;` statement from the RFC wont work. + // instead we should use `let op = Operator::from_uri("fs://?root=/tmp/test", vec![])?;` as done + // in `ofs` + + // TODO: should we merge it this way? + let merged_options = url_options.into_iter().chain(options); + + Self::from_iter(merged_options) } /// Convert this configuration into a service builder. diff --git a/core/src/types/operator/registry.rs b/core/src/types/operator/registry.rs index d0d70516f551..466383c66fd4 100644 --- a/core/src/types/operator/registry.rs +++ b/core/src/types/operator/registry.rs @@ -44,11 +44,11 @@ impl OperatorRegistry { .set_source(err) })?; - let scheme = parsed_uri.scheme().ok_or_else(|| { + let scheme = parsed_uri.scheme_str().ok_or_else(|| { Error::new(ErrorKind::ConfigInvalid, "uri is missing scheme").with_context("uri", uri) })?; - let factory = self.registry.get(scheme.as_str()).ok_or_else(|| { + let factory = self.registry.get(scheme).ok_or_else(|| { Error::new( ErrorKind::ConfigInvalid, "could not find any operator factory for the given scheme", From 3aba98b27bd3bfd2cab95dae32371e8779d9d70d Mon Sep 17 00:00:00 2001 From: Jorge Hermo Date: Mon, 30 Dec 2024 20:35:05 +0100 Subject: [PATCH 07/23] test: add small doctest to Operator::from_uri --- core/src/types/builder.rs | 2 +- core/src/types/operator/builder.rs | 8 ++++++++ core/src/types/operator/registry.rs | 11 +++++++---- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/core/src/types/builder.rs b/core/src/types/builder.rs index 1ae9a993f1e1..cc2a4895286b 100644 --- a/core/src/types/builder.rs +++ b/core/src/types/builder.rs @@ -174,7 +174,7 @@ pub trait Configurator: Serialize + DeserializeOwned + Debug + 'static { // TODO: we are not interpreting the host or path params // the `let op = Operator::from_uri("fs:///tmp/test", vec![])?;` statement from the RFC wont work. // instead we should use `let op = Operator::from_uri("fs://?root=/tmp/test", vec![])?;` as done - // in `ofs` + // in `ofs`. The `fs` service should override this default implementation if it wants to use the host or path params? // TODO: should we merge it this way? let merged_options = url_options.into_iter().chain(options); diff --git a/core/src/types/operator/builder.rs b/core/src/types/operator/builder.rs index bad3917bc174..21b98be9e87a 100644 --- a/core/src/types/operator/builder.rs +++ b/core/src/types/operator/builder.rs @@ -98,6 +98,14 @@ impl Operator { } /// TODO: document this. + /// + /// TODO: improve those examples + /// # Examples + /// ``` + /// let op = Operator::from_uri("fs://?root=/tmp/test", vec![])? + /// + /// Ok(()) + /// ``` pub fn from_uri( uri: &str, options: impl IntoIterator, diff --git a/core/src/types/operator/registry.rs b/core/src/types/operator/registry.rs index 466383c66fd4..a1adb6beb530 100644 --- a/core/src/types/operator/registry.rs +++ b/core/src/types/operator/registry.rs @@ -38,15 +38,18 @@ impl OperatorRegistry { uri: &str, options: impl IntoIterator, ) -> Result { - let parsed_uri = http::Uri::try_from(uri).map_err(|err| { + // TODO: we use the `url::Url` struct instead of `http:Uri`, because + // we needed it in `Configurator::from_uri` method. + let parsed_url = url::Url::parse(uri).map_err(|err| { Error::new(ErrorKind::ConfigInvalid, "uri is invalid") .with_context("uri", uri) .set_source(err) })?; - let scheme = parsed_uri.scheme_str().ok_or_else(|| { - Error::new(ErrorKind::ConfigInvalid, "uri is missing scheme").with_context("uri", uri) - })?; + // TODO: with the `url::Url` struct, we always have the scheme (it is not an Option) + // but with the `http::Uri` crate, it can be missing https://docs.rs/http/latest/http/uri/struct.Uri.html#method.scheme + // which one should we use? + let scheme = parsed_url.scheme(); let factory = self.registry.get(scheme).ok_or_else(|| { Error::new( From 5979898e9acd697bda1d5404a86333e683f4e7e7 Mon Sep 17 00:00:00 2001 From: Jorge Hermo Date: Mon, 30 Dec 2024 20:42:27 +0100 Subject: [PATCH 08/23] chore: add comment --- core/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/Cargo.toml b/core/Cargo.toml index cf4728709a6a..5597f339a309 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -256,7 +256,7 @@ serde = { version = "1", features = ["derive"] } serde_json = "1" tokio = { version = "1.27", features = ["sync", "io-util"] } # TODO: I added this dependency in order to not re-implement the Url::query_pairs function. -# If its okay to do it (we are using that crate in the ofs binary https://github.com/apache/opendal/blob/main/bin/ofs/Cargo.toml#L45 +# Is it okay to include it? (we are using that crate in the ofs binary https://github.com/apache/opendal/blob/main/bin/ofs/Cargo.toml#L45 url = "2.5.4" uuid = { version = "1", features = ["serde", "v4"] } From 6eed396b7e48abf42b7548f719ee987840df1bf4 Mon Sep 17 00:00:00 2001 From: Jorge Hermo Date: Mon, 30 Dec 2024 20:43:15 +0100 Subject: [PATCH 09/23] chore: remove changes --- core/src/services/mod.rs | 5 ----- 1 file changed, 5 deletions(-) diff --git a/core/src/services/mod.rs b/core/src/services/mod.rs index 06456caf8d58..0437dff4a759 100644 --- a/core/src/services/mod.rs +++ b/core/src/services/mod.rs @@ -19,18 +19,13 @@ //! //! More ongoing services support is tracked at [opendal#5](https://github.com/apache/opendal/issues/5). Please feel free to submit issues if there are services not covered. -use crate::Builder; -use std::collections::HashSet; - mod aliyun_drive; - pub use aliyun_drive::*; mod alluxio; pub use alluxio::*; mod atomicserver; - pub use self::atomicserver::*; mod azblob; From 0e582ee0c41758fb4630ff4499935e4c899742c6 Mon Sep 17 00:00:00 2001 From: Jorge Hermo Date: Mon, 30 Dec 2024 20:54:30 +0100 Subject: [PATCH 10/23] chore: remove changes --- core/src/types/scheme.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/core/src/types/scheme.rs b/core/src/types/scheme.rs index f70f6e1236ef..c0da5219b829 100644 --- a/core/src/types/scheme.rs +++ b/core/src/types/scheme.rs @@ -22,8 +22,6 @@ use std::str::FromStr; use crate::Error; -use super::Builder; - /// Services that OpenDAL supports /// /// # Notes From 9d730c0d539e682de0cc6966adb89e32af9afdd3 Mon Sep 17 00:00:00 2001 From: Jorge Hermo Date: Mon, 30 Dec 2024 21:03:57 +0100 Subject: [PATCH 11/23] chore: add license header --- core/src/types/operator/builder.rs | 8 ++++++-- core/src/types/operator/registry.rs | 17 +++++++++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/core/src/types/operator/builder.rs b/core/src/types/operator/builder.rs index 21b98be9e87a..012a903f81fe 100644 --- a/core/src/types/operator/builder.rs +++ b/core/src/types/operator/builder.rs @@ -102,9 +102,13 @@ impl Operator { /// TODO: improve those examples /// # Examples /// ``` - /// let op = Operator::from_uri("fs://?root=/tmp/test", vec![])? + /// # use anyhow::Result; + /// use opendal::Operator; /// - /// Ok(()) + /// fn test() -> Result<()> { + /// let op = Operator::from_uri("fs://?root=/tmp/test", vec![])?; + /// Ok(()) + /// } /// ``` pub fn from_uri( uri: &str, diff --git a/core/src/types/operator/registry.rs b/core/src/types/operator/registry.rs index a1adb6beb530..aa57bdc59fe8 100644 --- a/core/src/types/operator/registry.rs +++ b/core/src/types/operator/registry.rs @@ -1,3 +1,20 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + use std::cell::LazyCell; use std::collections::HashMap; From 976e60c281ed01008fb50264e343ca3b301e2421 Mon Sep 17 00:00:00 2001 From: Jorge Hermo Date: Mon, 30 Dec 2024 21:04:35 +0100 Subject: [PATCH 12/23] chore: fix typo --- core/src/types/operator/registry.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/core/src/types/operator/registry.rs b/core/src/types/operator/registry.rs index aa57bdc59fe8..7810c271568d 100644 --- a/core/src/types/operator/registry.rs +++ b/core/src/types/operator/registry.rs @@ -102,7 +102,6 @@ impl OperatorRegistry { // ``` // and the apply_for_all_services macro would gate every statement behind the corresponding feature gate // This seems to not be the place where we should have a "list of enabled services". - // Ther is something similar with `Scheme::enabled()` #[cfg(feature = "services-aliyun-drive")] registry.register_builder::(); #[cfg(feature = "services-atomicserver")] From a4478a9c684b8c2ff8185d27bb7a37c716d0100f Mon Sep 17 00:00:00 2001 From: Jorge Hermo Date: Mon, 30 Dec 2024 21:10:04 +0100 Subject: [PATCH 13/23] fix: clippy lint --- core/src/types/operator/registry.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/types/operator/registry.rs b/core/src/types/operator/registry.rs index 7810c271568d..c43094e3c5e7 100644 --- a/core/src/types/operator/registry.rs +++ b/core/src/types/operator/registry.rs @@ -24,7 +24,7 @@ use crate::*; // TODO: thread local or use LazyLock instead? this way the access is lock-free // TODO: should we expose the `GLOBAL_OPERATOR_REGISTRY` as public API at `crate::types::operator::GLOBAL_OPERATOR_REGISTRY`? thread_local! { - pub static GLOBAL_OPERATOR_REGISTRY: LazyCell = LazyCell::new(|| OperatorRegistry::with_enabled_services()); + pub static GLOBAL_OPERATOR_REGISTRY: LazyCell = LazyCell::new(OperatorRegistry::with_enabled_services); } // In order to reduce boilerplate, we should return in this function a `Builder` instead of operator?. From a4e38661bfe49d89ffdf71a21163587b96466846 Mon Sep 17 00:00:00 2001 From: Jorge Hermo Date: Mon, 30 Dec 2024 21:18:52 +0100 Subject: [PATCH 14/23] retrigger ci From 443c11db2ff983024df35109bbe3267addf1ceee Mon Sep 17 00:00:00 2001 From: Jorge Hermo Date: Sun, 5 Jan 2025 11:25:15 +0100 Subject: [PATCH 15/23] feat: drop usage of the url crate --- core/Cargo.lock | 273 +--------------------------- core/Cargo.toml | 3 - core/src/raw/http_util/mod.rs | 1 + core/src/raw/http_util/uri.rs | 19 ++ core/src/types/builder.rs | 23 +-- core/src/types/operator/registry.rs | 12 +- 6 files changed, 41 insertions(+), 290 deletions(-) diff --git a/core/Cargo.lock b/core/Cargo.lock index e4b60409e368..28417ce9e28b 100644 --- a/core/Cargo.lock +++ b/core/Cargo.lock @@ -2549,17 +2549,6 @@ dependencies = [ "winapi", ] -[[package]] -name = "displaydoc" -version = "0.2.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.90", -] - [[package]] name = "dlv-list" version = "0.5.2" @@ -3897,124 +3886,6 @@ dependencies = [ "cc", ] -[[package]] -name = "icu_collections" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526" -dependencies = [ - "displaydoc", - "yoke", - "zerofrom", - "zerovec", -] - -[[package]] -name = "icu_locid" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637" -dependencies = [ - "displaydoc", - "litemap", - "tinystr", - "writeable", - "zerovec", -] - -[[package]] -name = "icu_locid_transform" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e" -dependencies = [ - "displaydoc", - "icu_locid", - "icu_locid_transform_data", - "icu_provider", - "tinystr", - "zerovec", -] - -[[package]] -name = "icu_locid_transform_data" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e" - -[[package]] -name = "icu_normalizer" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f" -dependencies = [ - "displaydoc", - "icu_collections", - "icu_normalizer_data", - "icu_properties", - "icu_provider", - "smallvec", - "utf16_iter", - "utf8_iter", - "write16", - "zerovec", -] - -[[package]] -name = "icu_normalizer_data" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516" - -[[package]] -name = "icu_properties" -version = "1.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93d6020766cfc6302c15dbbc9c8778c37e62c14427cb7f6e601d849e092aeef5" -dependencies = [ - "displaydoc", - "icu_collections", - "icu_locid_transform", - "icu_properties_data", - "icu_provider", - "tinystr", - "zerovec", -] - -[[package]] -name = "icu_properties_data" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569" - -[[package]] -name = "icu_provider" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9" -dependencies = [ - "displaydoc", - "icu_locid", - "icu_provider_macros", - "stable_deref_trait", - "tinystr", - "writeable", - "yoke", - "zerofrom", - "zerovec", -] - -[[package]] -name = "icu_provider_macros" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.90", -] - [[package]] name = "ident_case" version = "1.0.1" @@ -4033,23 +3904,12 @@ dependencies = [ [[package]] name = "idna" -version = "1.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e" -dependencies = [ - "idna_adapter", - "smallvec", - "utf8_iter", -] - -[[package]] -name = "idna_adapter" -version = "1.2.0" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "daca1df1c957320b2cf139ac61e7bd64fed304c5040df000a745aa1de3b4ef71" +checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" dependencies = [ - "icu_normalizer", - "icu_properties", + "unicode-bidi", + "unicode-normalization", ] [[package]] @@ -4449,12 +4309,6 @@ version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" -[[package]] -name = "litemap" -version = "0.7.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104" - [[package]] name = "lock_api" version = "0.4.12" @@ -5213,7 +5067,6 @@ dependencies = [ "tracing", "tracing-opentelemetry", "tracing-subscriber", - "url", "uuid", ] @@ -8258,17 +8111,6 @@ dependencies = [ "futures-core", ] -[[package]] -name = "synstructure" -version = "0.13.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.90", -] - [[package]] name = "tagptr" version = "0.2.0" @@ -8464,16 +8306,6 @@ dependencies = [ "crunchy", ] -[[package]] -name = "tinystr" -version = "0.7.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f" -dependencies = [ - "displaydoc", - "zerovec", -] - [[package]] name = "tinytemplate" version = "1.2.1" @@ -9062,12 +8894,12 @@ dependencies = [ [[package]] name = "url" -version = "2.5.4" +version = "2.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32f8b686cadd1473f4bd0117a5d28d36b1ade384ea9b5069a1c40aefed7fda60" +checksum = "22784dbdf76fdde8af1aeda5622b546b422b6fc585325248a2bf9f5e41e94d6c" dependencies = [ "form_urlencoded", - "idna 1.0.3", + "idna 0.5.0", "percent-encoding", ] @@ -9083,18 +8915,6 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" -[[package]] -name = "utf16_iter" -version = "1.0.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246" - -[[package]] -name = "utf8_iter" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" - [[package]] name = "utf8parse" version = "0.2.2" @@ -9697,18 +9517,6 @@ dependencies = [ "windows-sys 0.48.0", ] -[[package]] -name = "write16" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936" - -[[package]] -name = "writeable" -version = "0.5.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" - [[package]] name = "ws_stream_wasm" version = "0.7.4" @@ -9761,30 +9569,6 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" -[[package]] -name = "yoke" -version = "0.7.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "120e6aef9aa629e3d4f52dc8cc43a015c7724194c97dfaf45180d2daf2b77f40" -dependencies = [ - "serde", - "stable_deref_trait", - "yoke-derive", - "zerofrom", -] - -[[package]] -name = "yoke-derive" -version = "0.7.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.90", - "synstructure", -] - [[package]] name = "zerocopy" version = "0.7.35" @@ -9806,55 +9590,12 @@ dependencies = [ "syn 2.0.90", ] -[[package]] -name = "zerofrom" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cff3ee08c995dee1859d998dea82f7374f2826091dd9cd47def953cae446cd2e" -dependencies = [ - "zerofrom-derive", -] - -[[package]] -name = "zerofrom-derive" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "595eed982f7d355beb85837f651fa22e90b3c044842dc7f2c2842c086f295808" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.90", - "synstructure", -] - [[package]] name = "zeroize" version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" -[[package]] -name = "zerovec" -version = "0.10.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa2b893d79df23bfb12d5461018d408ea19dfafe76c2c7ef6d4eba614f8ff079" -dependencies = [ - "yoke", - "zerofrom", - "zerovec-derive", -] - -[[package]] -name = "zerovec-derive" -version = "0.10.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.90", -] - [[package]] name = "zigzag" version = "0.1.0" diff --git a/core/Cargo.toml b/core/Cargo.toml index 5597f339a309..d404845dd3e2 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -255,9 +255,6 @@ reqwest = { version = "0.12.2", features = [ serde = { version = "1", features = ["derive"] } serde_json = "1" tokio = { version = "1.27", features = ["sync", "io-util"] } -# TODO: I added this dependency in order to not re-implement the Url::query_pairs function. -# Is it okay to include it? (we are using that crate in the ofs binary https://github.com/apache/opendal/blob/main/bin/ofs/Cargo.toml#L45 -url = "2.5.4" uuid = { version = "1", features = ["serde", "v4"] } # Test only dependencies diff --git a/core/src/raw/http_util/mod.rs b/core/src/raw/http_util/mod.rs index c90b1e485845..6fa39f9f68b0 100644 --- a/core/src/raw/http_util/mod.rs +++ b/core/src/raw/http_util/mod.rs @@ -55,6 +55,7 @@ pub use header::parse_prefixed_headers; mod uri; pub use uri::percent_decode_path; pub use uri::percent_encode_path; +pub use uri::query_pairs; mod error; pub use error::new_request_build_error; diff --git a/core/src/raw/http_util/uri.rs b/core/src/raw/http_util/uri.rs index 1f3b893e035e..1339494f8fb3 100644 --- a/core/src/raw/http_util/uri.rs +++ b/core/src/raw/http_util/uri.rs @@ -58,6 +58,25 @@ pub fn percent_decode_path(path: &str) -> String { } } +/// query_pairs will parse a query string encoded as key-value pairs separated by `&` to a vector of key-value pairs. +/// It also does percent decoding for both key and value. +/// +/// Note that `?` is not allowed in the query string, and it will be treated as a part of the first key if included. +pub fn query_pairs(query: &str) -> Vec<(String, String)> { + query + .split('&') + .filter_map(|pair| { + let mut iter = pair.splitn(2, '='); + // TODO: should we silently ignore invalid pairs and filter them out without the user noticing? + // or should we return an error in the whole `query_pairs` function so the caller knows it failed? + let key = iter.next()?; + let value = iter.next().unwrap_or(""); + Some((key, value)) + }) + .map(|(key, value)| (percent_decode_path(key), percent_decode_path(value))) + .collect() +} + #[cfg(test)] mod tests { use super::*; diff --git a/core/src/types/builder.rs b/core/src/types/builder.rs index cc2a4895286b..6f65c50d6446 100644 --- a/core/src/types/builder.rs +++ b/core/src/types/builder.rs @@ -17,6 +17,7 @@ use std::fmt::Debug; +use http::Uri; use serde::de::DeserializeOwned; use serde::Serialize; @@ -149,27 +150,19 @@ pub trait Configurator: Serialize + DeserializeOwned + Debug + 'static { /// TODO: document this. fn from_uri(uri: &str, options: impl IntoIterator) -> Result { - // TODO: We are using the `url::Url` struct instead of `http:Uri`, because `http::Uri` does not implement - // the `query_pairs` method, which we need to extract the options from the uri. - // `http::Uri` has the `query` method (https://docs.rs/http/latest/http/uri/struct.Uri.html#method.query) - // but it outputs the raw query string. Otherwise, we would need to implement the parsing of the query string - // ourselves. - - // TODO: we are using `url::Url`, not an URI. Should we rename this method to `from_url`? (all of the rest of the PR) - // The rfc stated that we would use URIs and not URL, but I wonder if we should refer to just URL instead. - // See the goal section of https://url.spec.whatwg.org/ (the `url` crate implements that spec). - // There they say that the term URI is confusing and that URL - // should be used instead. The `url::Url` crate supports URL fragments, so it should be enough for our use case. - let parsed_url = url::Url::parse(uri).map_err(|err| { + let parsed_uri = uri.parse::().map_err(|err| { Error::new(ErrorKind::ConfigInvalid, "uri is invalid") .with_context("uri", uri) .set_source(err) })?; - // TODO: I have some doubts about this + + // TODO: I have some doubts about this default implementation // It was inspired from https://github.com/apache/opendal/blob/52c96bb8e8cb4d024ccab1f415c4756447c726dd/bin/ofs/src/main.rs#L60 // Parameters should be specified in uri's query param. Example: 'fs://?root=' // this is very similar to https://github.com/apache/opendal/blob/52c96bb8e8cb4d024ccab1f415c4756447c726dd/bin/ofs/README.md?plain=1#L45 - let url_options = parsed_url.query_pairs().into_owned(); + let query_pairs = parsed_uri.query().map(query_pairs).unwrap_or_default(); + + // TODO: should we log a warning if the query_pairs vector is empty? // TODO: we are not interpreting the host or path params // the `let op = Operator::from_uri("fs:///tmp/test", vec![])?;` statement from the RFC wont work. @@ -177,7 +170,7 @@ pub trait Configurator: Serialize + DeserializeOwned + Debug + 'static { // in `ofs`. The `fs` service should override this default implementation if it wants to use the host or path params? // TODO: should we merge it this way? - let merged_options = url_options.into_iter().chain(options); + let merged_options = query_pairs.into_iter().chain(options); Self::from_iter(merged_options) } diff --git a/core/src/types/operator/registry.rs b/core/src/types/operator/registry.rs index c43094e3c5e7..f85257a6a10d 100644 --- a/core/src/types/operator/registry.rs +++ b/core/src/types/operator/registry.rs @@ -18,6 +18,8 @@ use std::cell::LazyCell; use std::collections::HashMap; +use http::Uri; + use crate::services::*; use crate::*; @@ -57,16 +59,15 @@ impl OperatorRegistry { ) -> Result { // TODO: we use the `url::Url` struct instead of `http:Uri`, because // we needed it in `Configurator::from_uri` method. - let parsed_url = url::Url::parse(uri).map_err(|err| { + let parsed_uri = uri.parse::().map_err(|err| { Error::new(ErrorKind::ConfigInvalid, "uri is invalid") .with_context("uri", uri) .set_source(err) })?; - // TODO: with the `url::Url` struct, we always have the scheme (it is not an Option) - // but with the `http::Uri` crate, it can be missing https://docs.rs/http/latest/http/uri/struct.Uri.html#method.scheme - // which one should we use? - let scheme = parsed_url.scheme(); + let scheme = parsed_uri.scheme_str().ok_or_else(|| { + Error::new(ErrorKind::ConfigInvalid, "uri is missing scheme").with_context("uri", uri) + })?; let factory = self.registry.get(scheme).ok_or_else(|| { Error::new( @@ -81,7 +82,6 @@ impl OperatorRegistry { // however, impl Traits in type aliases is unstable and also are not allowed in fn pointers let options = options.into_iter().collect(); - // TODO: `OperatorFactory` should use `&str` instead of `String`? we are cloning it anyway factory(uri, options) } From e7215b42e70a9f978ce338d3b604bd3e9a16db61 Mon Sep 17 00:00:00 2001 From: Jorge Hermo Date: Sun, 5 Jan 2025 11:25:50 +0100 Subject: [PATCH 16/23] chore: remove TODO --- core/src/types/builder.rs | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/core/src/types/builder.rs b/core/src/types/builder.rs index 6f65c50d6446..8737c1c43eb0 100644 --- a/core/src/types/builder.rs +++ b/core/src/types/builder.rs @@ -138,16 +138,6 @@ pub trait Configurator: Serialize + DeserializeOwned + Debug + 'static { }) } - // TODO: should we split `from_uri` into two functions? `from_uri` and `from_uri_opts`? - // So we can have: - // ```rust - // fn from_uri(uri: &str) -> Result {...} - // fn from_uri_opts(uri: &str, options: impl IntoIterator) -> Result {...} - //```? - // This way, we can reduce the boilerplate of passing an empty iterator and - // `let op = Operator::from_uri("fs:///tmp/test", vec![])?;` - // becomes `let op = Operator::from_uri("fs:///tmp/test")?;` which is simpler. - /// TODO: document this. fn from_uri(uri: &str, options: impl IntoIterator) -> Result { let parsed_uri = uri.parse::().map_err(|err| { From b6dd1dc1c354c8b7c843bfebd358780626ee0d46 Mon Sep 17 00:00:00 2001 From: Jorge Hermo Date: Sun, 5 Jan 2025 11:30:01 +0100 Subject: [PATCH 17/23] feat: wrap OperatorRegistry in Arc Mutex --- core/src/types/operator/registry.rs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/core/src/types/operator/registry.rs b/core/src/types/operator/registry.rs index f85257a6a10d..a7f1c4410304 100644 --- a/core/src/types/operator/registry.rs +++ b/core/src/types/operator/registry.rs @@ -17,6 +17,7 @@ use std::cell::LazyCell; use std::collections::HashMap; +use std::sync::{Arc, Mutex}; use http::Uri; @@ -35,21 +36,24 @@ pub type OperatorFactory = fn(&str, HashMap) -> Result // TODO: create an static registry? or a global() method of OperatorRegistry that lazily initializes the registry? // Register only services in `Scheme::enabled()` +#[derive(Clone, Debug)] pub struct OperatorRegistry { - // TODO: add Arc> to make it cheap to clone + thread safe? or is it not needed? - registry: HashMap, + registry: Arc>>, } impl OperatorRegistry { pub fn new() -> Self { Self { - registry: HashMap::new(), + registry: Default::default(), } } pub fn register(&mut self, scheme: &str, factory: OperatorFactory) { // TODO: should we receive a `&str` or a `String`? we are cloning it anyway - self.registry.insert(scheme.to_string(), factory); + self.registry + .lock() + .expect("poisoned lock") + .insert(scheme.to_string(), factory); } pub fn parse( From e887d24621f0a00bf02449374d6eda61fa4bf052 Mon Sep 17 00:00:00 2001 From: Jorge Hermo Date: Sun, 5 Jan 2025 11:32:56 +0100 Subject: [PATCH 18/23] feat: Initialize operator registry in new method --- core/src/types/operator/registry.rs | 95 ++++++++++++++--------------- 1 file changed, 45 insertions(+), 50 deletions(-) diff --git a/core/src/types/operator/registry.rs b/core/src/types/operator/registry.rs index a7f1c4410304..7e02b522db59 100644 --- a/core/src/types/operator/registry.rs +++ b/core/src/types/operator/registry.rs @@ -27,7 +27,7 @@ use crate::*; // TODO: thread local or use LazyLock instead? this way the access is lock-free // TODO: should we expose the `GLOBAL_OPERATOR_REGISTRY` as public API at `crate::types::operator::GLOBAL_OPERATOR_REGISTRY`? thread_local! { - pub static GLOBAL_OPERATOR_REGISTRY: LazyCell = LazyCell::new(OperatorRegistry::with_enabled_services); + pub static GLOBAL_OPERATOR_REGISTRY: LazyCell = LazyCell::new(OperatorRegistry::new); } // In order to reduce boilerplate, we should return in this function a `Builder` instead of operator?. @@ -36,61 +36,14 @@ pub type OperatorFactory = fn(&str, HashMap) -> Result // TODO: create an static registry? or a global() method of OperatorRegistry that lazily initializes the registry? // Register only services in `Scheme::enabled()` -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Default)] pub struct OperatorRegistry { registry: Arc>>, } impl OperatorRegistry { pub fn new() -> Self { - Self { - registry: Default::default(), - } - } - - pub fn register(&mut self, scheme: &str, factory: OperatorFactory) { - // TODO: should we receive a `&str` or a `String`? we are cloning it anyway - self.registry - .lock() - .expect("poisoned lock") - .insert(scheme.to_string(), factory); - } - - pub fn parse( - &self, - uri: &str, - options: impl IntoIterator, - ) -> Result { - // TODO: we use the `url::Url` struct instead of `http:Uri`, because - // we needed it in `Configurator::from_uri` method. - let parsed_uri = uri.parse::().map_err(|err| { - Error::new(ErrorKind::ConfigInvalid, "uri is invalid") - .with_context("uri", uri) - .set_source(err) - })?; - - let scheme = parsed_uri.scheme_str().ok_or_else(|| { - Error::new(ErrorKind::ConfigInvalid, "uri is missing scheme").with_context("uri", uri) - })?; - - let factory = self.registry.get(scheme).ok_or_else(|| { - Error::new( - ErrorKind::ConfigInvalid, - "could not find any operator factory for the given scheme", - ) - .with_context("uri", uri) - .with_context("scheme", scheme) - })?; - - // TODO: `OperatorFactory` should receive `IntoIterator` instead of `HashMap`? - // however, impl Traits in type aliases is unstable and also are not allowed in fn pointers - let options = options.into_iter().collect(); - - factory(uri, options) - } - - pub fn with_enabled_services() -> Self { - let mut registry = Self::new(); + let mut registry = Self::default(); // TODO: is this correct? have a `Builder::enabled()` method that returns the set of enabled services builders? // Similar to `Scheme::Enabled()` // or have an `Scheme::associated_builder` that given a scheme returns the associated builder? The problem with this @@ -230,6 +183,48 @@ impl OperatorRegistry { registry } + pub fn register(&mut self, scheme: &str, factory: OperatorFactory) { + // TODO: should we receive a `&str` or a `String`? we are cloning it anyway + self.registry + .lock() + .expect("poisoned lock") + .insert(scheme.to_string(), factory); + } + + pub fn parse( + &self, + uri: &str, + options: impl IntoIterator, + ) -> Result { + // TODO: we use the `url::Url` struct instead of `http:Uri`, because + // we needed it in `Configurator::from_uri` method. + let parsed_uri = uri.parse::().map_err(|err| { + Error::new(ErrorKind::ConfigInvalid, "uri is invalid") + .with_context("uri", uri) + .set_source(err) + })?; + + let scheme = parsed_uri.scheme_str().ok_or_else(|| { + Error::new(ErrorKind::ConfigInvalid, "uri is missing scheme").with_context("uri", uri) + })?; + + let registry_lock = self.registry.lock().expect("poisoned lock"); + let factory = registry_lock.get(scheme).ok_or_else(|| { + Error::new( + ErrorKind::ConfigInvalid, + "could not find any operator factory for the given scheme", + ) + .with_context("uri", uri) + .with_context("scheme", scheme) + })?; + + // TODO: `OperatorFactory` should receive `IntoIterator` instead of `HashMap`? + // however, impl Traits in type aliases is unstable and also are not allowed in fn pointers + let options = options.into_iter().collect(); + + factory(uri, options) + } + fn register_builder(&mut self) { self.register( B::SCHEME.into_static(), From 41790a5decb9eaa3b2ea18d3c5b2962e99f8d985 Mon Sep 17 00:00:00 2001 From: Jorge Hermo Date: Sun, 5 Jan 2025 11:46:30 +0100 Subject: [PATCH 19/23] chore: remove todo --- core/src/types/builder.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/core/src/types/builder.rs b/core/src/types/builder.rs index 8737c1c43eb0..1d81f0175499 100644 --- a/core/src/types/builder.rs +++ b/core/src/types/builder.rs @@ -159,7 +159,6 @@ pub trait Configurator: Serialize + DeserializeOwned + Debug + 'static { // instead we should use `let op = Operator::from_uri("fs://?root=/tmp/test", vec![])?;` as done // in `ofs`. The `fs` service should override this default implementation if it wants to use the host or path params? - // TODO: should we merge it this way? let merged_options = query_pairs.into_iter().chain(options); Self::from_iter(merged_options) From 3b80f703c9dab2b8db32cd81cf3e5071af26c35b Mon Sep 17 00:00:00 2001 From: Jorge Hermo Date: Sun, 5 Jan 2025 12:46:30 +0100 Subject: [PATCH 20/23] feat: move service registration to builder methods --- core/src/types/builder.rs | 10 ++ core/src/types/operator/mod.rs | 1 + core/src/types/operator/registry.rs | 142 +++++++++++++--------------- 3 files changed, 75 insertions(+), 78 deletions(-) diff --git a/core/src/types/builder.rs b/core/src/types/builder.rs index 1d81f0175499..a57ea7152391 100644 --- a/core/src/types/builder.rs +++ b/core/src/types/builder.rs @@ -23,6 +23,7 @@ use serde::Serialize; use crate::raw::*; use crate::*; +use types::operator::{OperatorFactory, OperatorRegistry}; /// Builder is used to set up underlying services. /// @@ -57,6 +58,15 @@ pub trait Builder: Default + 'static { /// Consume the accessor builder to build a service. fn build(self) -> Result; + + /// Register this builder in the given registry. + fn register_in_registry(registry: &mut OperatorRegistry) { + let operator_factory: OperatorFactory = |uri, options| { + let builder = Self::Config::from_uri(uri, options)?.into_builder(); + Ok(Operator::new(builder)?.finish()) + }; + registry.register(Self::SCHEME.into_static(), operator_factory) + } } /// Dummy implementation of builder diff --git a/core/src/types/operator/mod.rs b/core/src/types/operator/mod.rs index d28325386ce0..5b2ea2f31158 100644 --- a/core/src/types/operator/mod.rs +++ b/core/src/types/operator/mod.rs @@ -39,3 +39,4 @@ mod registry; // TODO: warning as not used. How can we expose them as public api? pub use registry::OperatorFactory; pub use registry::OperatorRegistry; +pub use registry::GLOBAL_OPERATOR_REGISTRY; diff --git a/core/src/types/operator/registry.rs b/core/src/types/operator/registry.rs index 7e02b522db59..e14f1e195b6c 100644 --- a/core/src/types/operator/registry.rs +++ b/core/src/types/operator/registry.rs @@ -33,9 +33,9 @@ thread_local! { // In order to reduce boilerplate, we should return in this function a `Builder` instead of operator?. pub type OperatorFactory = fn(&str, HashMap) -> Result; -// TODO: create an static registry? or a global() method of OperatorRegistry that lazily initializes the registry? -// Register only services in `Scheme::enabled()` - +// TODO: the default implementation should return an empty registry? Or it shoudl return the initialized +// registry with all the services that are enabled? If so, should we include an `OperatorRegistry::empty` method +// that allows users to create an empty registry? #[derive(Clone, Debug, Default)] pub struct OperatorRegistry { registry: Arc>>, @@ -54,131 +54,131 @@ impl OperatorRegistry { // // ```rust // apply_for_all_services!{ - // registry.register_builder::<$service>(); + // $service::register_in_registry(&mut registry>(); // } // ``` // and the apply_for_all_services macro would gate every statement behind the corresponding feature gate // This seems to not be the place where we should have a "list of enabled services". #[cfg(feature = "services-aliyun-drive")] - registry.register_builder::(); + AliyunDrive::register_in_registry(&mut registry); #[cfg(feature = "services-atomicserver")] - registry.register_builder::(); + Atomicserver::register_in_registry(&mut registry); #[cfg(feature = "services-alluxio")] - registry.register_builder::(); + Alluxio::register_in_registry(&mut registry); #[cfg(feature = "services-azblob")] - registry.register_builder::(); + Azblob::register_in_registry(&mut registry); #[cfg(feature = "services-azdls")] - registry.register_builder::(); + Azdls::register_in_registry(&mut registry); #[cfg(feature = "services-azfile")] - registry.register_builder::(); + Azfile::register_in_registry(&mut registry); #[cfg(feature = "services-b2")] - registry.register_builder::(); + B2::register_in_registry(&mut registry); #[cfg(feature = "services-cacache")] - registry.register_builder::(); + Cacache::register_in_registry(&mut registry); #[cfg(feature = "services-cos")] - registry.register_builder::(); + Cos::register_in_registry(&mut registry); #[cfg(feature = "services-compfs")] - registry.register_builder::(); + Compfs::register_in_registry(&mut registry); #[cfg(feature = "services-dashmap")] - registry.register_builder::(); + Dashmap::register_in_registry(&mut registry); #[cfg(feature = "services-dropbox")] - registry.register_builder::(); + Dropbox::register_in_registry(&mut registry); #[cfg(feature = "services-etcd")] - registry.register_builder::(); + Etcd::register_in_registry(&mut registry); #[cfg(feature = "services-foundationdb")] - registry.register_builder::(); + Foundationdb::register_in_registry(&mut registry); #[cfg(feature = "services-fs")] - registry.register_builder::(); + Fs::register_in_registry(&mut registry); #[cfg(feature = "services-ftp")] - registry.register_builder::(); + Ftp::register_in_registry(&mut registry); #[cfg(feature = "services-gcs")] - registry.register_builder::(); + Gcs::register_in_registry(&mut registry); #[cfg(feature = "services-ghac")] - registry.register_builder::(); + Ghac::register_in_registry(&mut registry); #[cfg(feature = "services-hdfs")] - registry.register_builder::(); + Hdfs::register_in_registry(&mut registry); #[cfg(feature = "services-http")] - registry.register_builder::(); + Http::register_in_registry(&mut registry); #[cfg(feature = "services-huggingface")] - registry.register_builder::(); + Huggingface::register_in_registry(&mut registry); #[cfg(feature = "services-ipfs")] - registry.register_builder::(); + Ipfs::register_in_registry(&mut registry); #[cfg(feature = "services-ipmfs")] - registry.register_builder::(); + Ipmfs::register_in_registry(&mut registry); #[cfg(feature = "services-icloud")] - registry.register_builder::(); + Icloud::register_in_registry(&mut registry); #[cfg(feature = "services-libsql")] - registry.register_builder::(); + Libsql::register_in_registry(&mut registry); #[cfg(feature = "services-memcached")] - registry.register_builder::(); + Memcached::register_in_registry(&mut registry); #[cfg(feature = "services-memory")] - registry.register_builder::(); + Memory::register_in_registry(&mut registry); #[cfg(feature = "services-mini-moka")] - registry.register_builder::(); + MiniMoka::register_in_registry(&mut registry); #[cfg(feature = "services-moka")] - registry.register_builder::(); + Moka::register_in_registry(&mut registry); #[cfg(feature = "services-monoiofs")] - registry.register_builder::(); + Monoiofs::register_in_registry(&mut registry); #[cfg(feature = "services-mysql")] - registry.register_builder::(); + Mysql::register_in_registry(&mut registry); #[cfg(feature = "services-obs")] - registry.register_builder::(); + Obs::register_in_registry(&mut registry); #[cfg(feature = "services-onedrive")] - registry.register_builder::(); + Onedrive::register_in_registry(&mut registry); #[cfg(feature = "services-postgresql")] - registry.register_builder::(); + Postgresql::register_in_registry(&mut registry); #[cfg(feature = "services-gdrive")] - registry.register_builder::(); + Gdrive::register_in_registry(&mut registry); #[cfg(feature = "services-oss")] - registry.register_builder::(); + Oss::register_in_registry(&mut registry); #[cfg(feature = "services-persy")] - registry.register_builder::(); + Persy::register_in_registry(&mut registry); #[cfg(feature = "services-redis")] - registry.register_builder::(); + Redis::register_in_registry(&mut registry); #[cfg(feature = "services-rocksdb")] - registry.register_builder::(); + Rocksdb::register_in_registry(&mut registry); #[cfg(feature = "services-s3")] - registry.register_builder::(); + S3::register_in_registry(&mut registry); #[cfg(feature = "services-seafile")] - registry.register_builder::(); + Seafile::register_in_registry(&mut registry); #[cfg(feature = "services-upyun")] - registry.register_builder::(); + Upyun::register_in_registry(&mut registry); #[cfg(feature = "services-yandex-disk")] - registry.register_builder::(); + YandexDisk::register_in_registry(&mut registry); #[cfg(feature = "services-pcloud")] - registry.register_builder::(); + Pcloud::register_in_registry(&mut registry); #[cfg(feature = "services-sftp")] - registry.register_builder::(); + Sftp::register_in_registry(&mut registry); #[cfg(feature = "services-sled")] - registry.register_builder::(); + Sled::register_in_registry(&mut registry); #[cfg(feature = "services-sqlite")] - registry.register_builder::(); + Sqlite::register_in_registry(&mut registry); #[cfg(feature = "services-supabase")] - registry.register_builder::(); + Supabase::register_in_registry(&mut registry); #[cfg(feature = "services-swift")] - registry.register_builder::(); + Swift::register_in_registry(&mut registry); #[cfg(feature = "services-tikv")] - registry.register_builder::(); + Tikv::register_in_registry(&mut registry); #[cfg(feature = "services-vercel-artifacts")] - registry.register_builder::(); + VercelArtifacts::register_in_registry(&mut registry); #[cfg(feature = "services-vercel-blob")] - registry.register_builder::(); + VercelBlob::register_in_registry(&mut registry); #[cfg(feature = "services-webdav")] - registry.register_builder::(); + Webdav::register_in_registry(&mut registry); #[cfg(feature = "services-webhdfs")] - registry.register_builder::(); + Webhdfs::register_in_registry(&mut registry); #[cfg(feature = "services-redb")] - registry.register_builder::(); + Redb::register_in_registry(&mut registry); #[cfg(feature = "services-mongodb")] - registry.register_builder::(); + Mongodb::register_in_registry(&mut registry); #[cfg(feature = "services-hdfs-native")] - registry.register_builder::(); + HdfsNative::register_in_registry(&mut registry); #[cfg(feature = "services-surrealdb")] - registry.register_builder::(); + Surrealdb::register_in_registry(&mut registry); #[cfg(feature = "services-lakefs")] - registry.register_builder::(); + Lakefs::register_in_registry(&mut registry); #[cfg(feature = "services-nebula-graph")] - registry.register_builder::(); + NebulaGraph::register_in_registry(&mut registry); registry } @@ -224,18 +224,4 @@ impl OperatorRegistry { factory(uri, options) } - - fn register_builder(&mut self) { - self.register( - B::SCHEME.into_static(), - operator_factory_from_configurator::(), - ); - } -} - -fn operator_factory_from_configurator() -> OperatorFactory { - |uri, options| { - let builder = C::from_uri(uri, options)?.into_builder(); - Ok(Operator::new(builder)?.finish()) - } } From c5d1c9cae07a3aeff917b49a0ba377e6dc2d6f84 Mon Sep 17 00:00:00 2001 From: Jorge Hermo Date: Sun, 5 Jan 2025 12:51:29 +0100 Subject: [PATCH 21/23] chore: fix comment typo --- core/src/types/operator/builder.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/types/operator/builder.rs b/core/src/types/operator/builder.rs index 012a903f81fe..de42189ed8d6 100644 --- a/core/src/types/operator/builder.rs +++ b/core/src/types/operator/builder.rs @@ -21,7 +21,7 @@ use std::sync::Arc; use crate::layers::*; use crate::raw::*; use crate::*; -// TODO: is this impor path idiomatic to the project? +// TODO: is this import path idiomatic to the project? use super::registry::GLOBAL_OPERATOR_REGISTRY; /// # Operator build API From e53cdd781c742b1a27177c826f14c735ebcdc816 Mon Sep 17 00:00:00 2001 From: Jorge Hermo Date: Sun, 5 Jan 2025 12:51:58 +0100 Subject: [PATCH 22/23] chore: fix comment typo --- core/src/types/operator/registry.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/types/operator/registry.rs b/core/src/types/operator/registry.rs index e14f1e195b6c..28deadf3f1c2 100644 --- a/core/src/types/operator/registry.rs +++ b/core/src/types/operator/registry.rs @@ -33,7 +33,7 @@ thread_local! { // In order to reduce boilerplate, we should return in this function a `Builder` instead of operator?. pub type OperatorFactory = fn(&str, HashMap) -> Result; -// TODO: the default implementation should return an empty registry? Or it shoudl return the initialized +// TODO: the default implementation should return an empty registry? Or it should return the initialized // registry with all the services that are enabled? If so, should we include an `OperatorRegistry::empty` method // that allows users to create an empty registry? #[derive(Clone, Debug, Default)] From 79091acb1a93e68093e63671ed7c962f6f686648 Mon Sep 17 00:00:00 2001 From: Jorge Hermo Date: Sun, 5 Jan 2025 13:13:56 +0100 Subject: [PATCH 23/23] chore: add comment --- core/src/types/operator/builder.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/core/src/types/operator/builder.rs b/core/src/types/operator/builder.rs index de42189ed8d6..bc54b61ef2b0 100644 --- a/core/src/types/operator/builder.rs +++ b/core/src/types/operator/builder.rs @@ -100,14 +100,15 @@ impl Operator { /// TODO: document this. /// /// TODO: improve those examples + /// TODO: this test does not work. It always output Ok /// # Examples /// ``` /// # use anyhow::Result; /// use opendal::Operator; /// /// fn test() -> Result<()> { - /// let op = Operator::from_uri("fs://?root=/tmp/test", vec![])?; - /// Ok(()) + /// Operator::from_uri("fs://?root=/tmp/test", vec![])?; + /// Ok(()) /// } /// ``` pub fn from_uri(