-
-
Notifications
You must be signed in to change notification settings - Fork 2.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: improve performance and fix panic in async parquet reader (#11607)
- Loading branch information
Showing
7 changed files
with
264 additions
and
169 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
use once_cell::sync::Lazy; | ||
pub use options::*; | ||
use tokio::sync::RwLock; | ||
|
||
use super::*; | ||
|
||
type CacheKey = (CloudType, Option<CloudOptions>); | ||
|
||
/// A very simple cache that only stores a single object-store. | ||
/// This greatly reduces the query times as multiple object stores (when reading many small files) | ||
/// get rate limited when querying the DNS (can take up to 5s). | ||
#[allow(clippy::type_complexity)] | ||
static OBJECT_STORE_CACHE: Lazy<RwLock<Option<(CacheKey, Arc<dyn ObjectStore>)>>> = | ||
Lazy::new(Default::default); | ||
|
||
type BuildResult = PolarsResult<(CloudLocation, Arc<dyn ObjectStore>)>; | ||
|
||
#[allow(dead_code)] | ||
fn err_missing_feature(feature: &str, scheme: &str) -> BuildResult { | ||
polars_bail!( | ||
ComputeError: | ||
"feature '{}' must be enabled in order to use '{}' cloud urls", feature, scheme, | ||
); | ||
} | ||
#[cfg(any(feature = "azure", feature = "aws", feature = "gcp"))] | ||
fn err_missing_configuration(feature: &str, scheme: &str) -> BuildResult { | ||
polars_bail!( | ||
ComputeError: | ||
"configuration '{}' must be provided in order to use '{}' cloud urls", feature, scheme, | ||
); | ||
} | ||
|
||
/// Build an [`ObjectStore`] based on the URL and passed in url. Return the cloud location and an implementation of the object store. | ||
pub async fn build_object_store(url: &str, options: Option<&CloudOptions>) -> BuildResult { | ||
let cloud_location = CloudLocation::new(url)?; | ||
|
||
let cloud_type = CloudType::from_str(url)?; | ||
let options = options.cloned(); | ||
let key = (cloud_type, options); | ||
|
||
{ | ||
let cache = OBJECT_STORE_CACHE.read().await; | ||
if let Some((stored_key, store)) = cache.as_ref() { | ||
if stored_key == &key { | ||
return Ok((cloud_location, store.clone())); | ||
} | ||
} | ||
} | ||
|
||
let store = match key.0 { | ||
CloudType::File => { | ||
let local = LocalFileSystem::new(); | ||
Ok::<_, PolarsError>(Arc::new(local) as Arc<dyn ObjectStore>) | ||
}, | ||
CloudType::Aws => { | ||
#[cfg(feature = "aws")] | ||
{ | ||
let options = key | ||
.1 | ||
.as_ref() | ||
.map(Cow::Borrowed) | ||
.unwrap_or_else(|| Cow::Owned(Default::default())); | ||
let store = options.build_aws(url).await?; | ||
Ok::<_, PolarsError>(Arc::new(store) as Arc<dyn ObjectStore>) | ||
} | ||
#[cfg(not(feature = "aws"))] | ||
return err_missing_feature("aws", &cloud_location.scheme); | ||
}, | ||
CloudType::Gcp => { | ||
#[cfg(feature = "gcp")] | ||
match key.1.as_ref() { | ||
Some(options) => { | ||
let store = options.build_gcp(url)?; | ||
Ok::<_, PolarsError>(Arc::new(store) as Arc<dyn ObjectStore>) | ||
}, | ||
_ => return err_missing_configuration("gcp", &cloud_location.scheme), | ||
} | ||
#[cfg(not(feature = "gcp"))] | ||
return err_missing_feature("gcp", &cloud_location.scheme); | ||
}, | ||
CloudType::Azure => { | ||
{ | ||
#[cfg(feature = "azure")] | ||
match key.1.as_ref() { | ||
Some(options) => { | ||
let store = options.build_azure(url)?; | ||
Ok::<_, PolarsError>(Arc::new(store) as Arc<dyn ObjectStore>) | ||
}, | ||
_ => return err_missing_configuration("azure", &cloud_location.scheme), | ||
} | ||
} | ||
#[cfg(not(feature = "azure"))] | ||
return err_missing_feature("azure", &cloud_location.scheme); | ||
}, | ||
}?; | ||
let mut cache = OBJECT_STORE_CACHE.write().await; | ||
*cache = Some((key, store.clone())); | ||
Ok((cloud_location, store)) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.