Skip to content

Commit e1e1c79

Browse files
authored
Create ObjectStore from URL and Options (#4047) (#4200)
* Add parse_url function (#4047) * Clippy * Fix copypasta * Fix wasm32 build * More wasm fixes * Return remaining path * Don't use from_env
1 parent f875eec commit e1e1c79

File tree

3 files changed

+269
-1
lines changed

3 files changed

+269
-1
lines changed

object_store/src/aws/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -722,7 +722,7 @@ impl AmazonS3Builder {
722722
///
723723
/// - `s3://<bucket>/<path>`
724724
/// - `s3a://<bucket>/<path>`
725-
/// - `https://s3.<bucket>.amazonaws.com`
725+
/// - `https://s3.<region>.amazonaws.com/<bucket>`
726726
/// - `https://<bucket>.s3.<region>.amazonaws.com`
727727
/// - `https://ACCOUNT_ID.r2.cloudflarestorage.com/bucket`
728728
///

object_store/src/lib.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,8 +252,11 @@ mod config;
252252

253253
#[cfg(any(feature = "azure", feature = "aws", feature = "gcp"))]
254254
mod multipart;
255+
mod parse;
255256
mod util;
256257

258+
pub use parse::{parse_url, parse_url_opts};
259+
257260
use crate::path::Path;
258261
#[cfg(not(target_arch = "wasm32"))]
259262
use crate::util::maybe_spawn_blocking;

object_store/src/parse.rs

Lines changed: 265 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,265 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
#[cfg(not(target_arch = "wasm32"))]
19+
use crate::local::LocalFileSystem;
20+
use crate::memory::InMemory;
21+
use crate::path::Path;
22+
use crate::ObjectStore;
23+
use snafu::Snafu;
24+
use url::Url;
25+
26+
#[derive(Debug, Snafu)]
27+
enum Error {
28+
#[snafu(display("Unable to convert URL \"{}\" to filesystem path", url))]
29+
InvalidUrl { url: Url },
30+
31+
#[snafu(display("Unable to recognise URL \"{}\"", url))]
32+
Unrecognised { url: Url },
33+
34+
#[snafu(display("Feature {scheme:?} not enabled"))]
35+
NotEnabled { scheme: ObjectStoreScheme },
36+
37+
#[snafu(context(false))]
38+
Path { source: crate::path::Error },
39+
}
40+
41+
impl From<Error> for super::Error {
42+
fn from(e: Error) -> Self {
43+
Self::Generic {
44+
store: "URL",
45+
source: Box::new(e),
46+
}
47+
}
48+
}
49+
50+
/// Recognises various URL formats, identifying the relevant [`ObjectStore`](crate::ObjectStore)
51+
#[derive(Debug, Eq, PartialEq)]
52+
enum ObjectStoreScheme {
53+
/// Url corresponding to [`LocalFileSystem`](crate::local::LocalFileSystem)
54+
Local,
55+
/// Url corresponding to [`InMemory`](crate::memory::InMemory)
56+
Memory,
57+
/// Url corresponding to [`AmazonS3`](crate::aws::AmazonS3)
58+
AmazonS3,
59+
/// Url corresponding to [`GoogleCloudStorage`](crate::gcp::GoogleCloudStorage)
60+
GoogleCloudStorage,
61+
/// Url corresponding to [`MicrosoftAzure`](crate::azure::MicrosoftAzure)
62+
MicrosoftAzure,
63+
/// Url corresponding to [`HttpStore`](crate::http::HttpStore)
64+
Http,
65+
}
66+
67+
impl ObjectStoreScheme {
68+
/// Create an [`ObjectStoreScheme`] from the provided [`Url`]
69+
///
70+
/// Returns the [`ObjectStoreScheme`] and the remaining [`Path`]
71+
fn parse(url: &Url) -> Result<(Self, Path), Error> {
72+
let strip_bucket = || Some(url.path().strip_prefix('/')?.split_once('/')?.1);
73+
74+
let (scheme, path) = match (url.scheme(), url.host_str()) {
75+
("file", None) => (Self::Local, url.path()),
76+
("memory", None) => (Self::Memory, url.path()),
77+
("s3" | "s3a", Some(_)) => (Self::AmazonS3, url.path()),
78+
("gs", Some(_)) => (Self::GoogleCloudStorage, url.path()),
79+
("az" | "adl" | "azure" | "abfs" | "abfss", Some(_)) => {
80+
(Self::MicrosoftAzure, url.path())
81+
}
82+
("http", Some(_)) => (Self::Http, url.path()),
83+
("https", Some(host)) => {
84+
if host.ends_with("dfs.core.windows.net")
85+
|| host.ends_with("blob.core.windows.net")
86+
{
87+
(Self::MicrosoftAzure, url.path())
88+
} else if host.ends_with("amazonaws.com") {
89+
match host.starts_with("s3") {
90+
true => (Self::AmazonS3, strip_bucket().unwrap_or_default()),
91+
false => (Self::AmazonS3, url.path()),
92+
}
93+
} else if host.ends_with("r2.cloudflarestorage.com") {
94+
(Self::AmazonS3, strip_bucket().unwrap_or_default())
95+
} else {
96+
(Self::Http, url.path())
97+
}
98+
}
99+
_ => return Err(Error::Unrecognised { url: url.clone() }),
100+
};
101+
102+
let path = Path::parse(path)?;
103+
Ok((scheme, path))
104+
}
105+
}
106+
107+
#[cfg(any(feature = "aws", feature = "gcp", feature = "azure", feature = "http"))]
108+
macro_rules! builder_opts {
109+
($builder:ty, $url:expr, $options:expr) => {{
110+
let builder = $options.into_iter().fold(
111+
<$builder>::new().with_url($url.as_str()),
112+
|builder, (key, value)| match key.as_ref().parse() {
113+
Ok(k) => builder.with_config(k, value),
114+
Err(_) => builder,
115+
},
116+
);
117+
Box::new(builder.build()?) as _
118+
}};
119+
}
120+
121+
/// Create an [`ObjectStore`] based on the provided `url`
122+
///
123+
/// Returns
124+
/// - An [`ObjectStore`] of the corresponding type
125+
/// - The [`Path`] into the [`ObjectStore`] of the addressed resource
126+
pub fn parse_url(url: &Url) -> Result<(Box<dyn ObjectStore>, Path), super::Error> {
127+
parse_url_opts(url, std::iter::empty::<(&str, &str)>())
128+
}
129+
130+
/// Create an [`ObjectStore`] based on the provided `url` and options
131+
///
132+
/// Returns
133+
/// - An [`ObjectStore`] of the corresponding type
134+
/// - The [`Path`] into the [`ObjectStore`] of the addressed resource
135+
pub fn parse_url_opts<I, K, V>(
136+
url: &Url,
137+
options: I,
138+
) -> Result<(Box<dyn ObjectStore>, Path), super::Error>
139+
where
140+
I: IntoIterator<Item = (K, V)>,
141+
K: AsRef<str>,
142+
V: Into<String>,
143+
{
144+
let _options = options;
145+
let (scheme, path) = ObjectStoreScheme::parse(url)?;
146+
let path = Path::parse(path)?;
147+
148+
let store = match scheme {
149+
#[cfg(not(target_arch = "wasm32"))]
150+
ObjectStoreScheme::Local => Box::new(LocalFileSystem::new()) as _,
151+
ObjectStoreScheme::Memory => Box::new(InMemory::new()) as _,
152+
#[cfg(feature = "aws")]
153+
ObjectStoreScheme::AmazonS3 => {
154+
builder_opts!(crate::aws::AmazonS3Builder, url, _options)
155+
}
156+
#[cfg(feature = "gcp")]
157+
ObjectStoreScheme::GoogleCloudStorage => {
158+
builder_opts!(crate::gcp::GoogleCloudStorageBuilder, url, _options)
159+
}
160+
#[cfg(feature = "azure")]
161+
ObjectStoreScheme::MicrosoftAzure => {
162+
builder_opts!(crate::azure::MicrosoftAzureBuilder, url, _options)
163+
}
164+
#[cfg(feature = "http")]
165+
ObjectStoreScheme::Http => {
166+
let url = &url[..url::Position::BeforePath];
167+
Box::new(crate::http::HttpBuilder::new().with_url(url).build()?) as _
168+
}
169+
#[cfg(not(all(
170+
feature = "aws",
171+
feature = "azure",
172+
feature = "gcp",
173+
feature = "http"
174+
)))]
175+
s => {
176+
return Err(super::Error::Generic {
177+
store: "parse_url",
178+
source: format!("feature for {s:?} not enabled").into(),
179+
})
180+
}
181+
};
182+
183+
Ok((store, path))
184+
}
185+
186+
#[cfg(test)]
187+
mod tests {
188+
use super::*;
189+
190+
#[test]
191+
fn test_parse() {
192+
let cases = [
193+
("file:/path", (ObjectStoreScheme::Local, "path")),
194+
("file:///path", (ObjectStoreScheme::Local, "path")),
195+
("memory:/path", (ObjectStoreScheme::Memory, "path")),
196+
("memory:///", (ObjectStoreScheme::Memory, "")),
197+
("s3://bucket/path", (ObjectStoreScheme::AmazonS3, "path")),
198+
("s3a://bucket/path", (ObjectStoreScheme::AmazonS3, "path")),
199+
(
200+
"https://s3.region.amazonaws.com/bucket",
201+
(ObjectStoreScheme::AmazonS3, ""),
202+
),
203+
(
204+
"https://s3.region.amazonaws.com/bucket/path",
205+
(ObjectStoreScheme::AmazonS3, "path"),
206+
),
207+
(
208+
"https://bucket.s3.region.amazonaws.com",
209+
(ObjectStoreScheme::AmazonS3, ""),
210+
),
211+
(
212+
"https://ACCOUNT_ID.r2.cloudflarestorage.com/bucket",
213+
(ObjectStoreScheme::AmazonS3, ""),
214+
),
215+
(
216+
"https://ACCOUNT_ID.r2.cloudflarestorage.com/bucket/path",
217+
(ObjectStoreScheme::AmazonS3, "path"),
218+
),
219+
(
220+
"abfs://container/path",
221+
(ObjectStoreScheme::MicrosoftAzure, "path"),
222+
),
223+
(
224+
"abfs://file_system@account_name.dfs.core.windows.net/path",
225+
(ObjectStoreScheme::MicrosoftAzure, "path"),
226+
),
227+
(
228+
"abfss://file_system@account_name.dfs.core.windows.net/path",
229+
(ObjectStoreScheme::MicrosoftAzure, "path"),
230+
),
231+
(
232+
"https://account.dfs.core.windows.net",
233+
(ObjectStoreScheme::MicrosoftAzure, ""),
234+
),
235+
(
236+
"https://account.blob.core.windows.net",
237+
(ObjectStoreScheme::MicrosoftAzure, ""),
238+
),
239+
(
240+
"gs://bucket/path",
241+
(ObjectStoreScheme::GoogleCloudStorage, "path"),
242+
),
243+
("http://mydomain/path", (ObjectStoreScheme::Http, "path")),
244+
("https://mydomain/path", (ObjectStoreScheme::Http, "path")),
245+
];
246+
247+
for (s, (expected_scheme, expected_path)) in cases {
248+
let url = Url::parse(s).unwrap();
249+
let (scheme, path) = ObjectStoreScheme::parse(&url).unwrap();
250+
251+
assert_eq!(scheme, expected_scheme, "{s}");
252+
assert_eq!(path, Path::parse(expected_path).unwrap(), "{s}");
253+
}
254+
255+
let neg_cases = [
256+
"unix:/run/foo.socket",
257+
"file://remote/path",
258+
"memory://remote/",
259+
];
260+
for s in neg_cases {
261+
let url = Url::parse(s).unwrap();
262+
assert!(ObjectStoreScheme::parse(&url).is_err());
263+
}
264+
}
265+
}

0 commit comments

Comments
 (0)