From ffa3dd9c92ebd7cd63400972b8b093ded8988d36 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Thu, 30 Jan 2025 23:51:42 -0500 Subject: [PATCH] Always check env when constructing stores, remove `from_env` (#189) --- obstore/python/obstore/store/_aws.pyi | 74 ++++++++----------------- obstore/python/obstore/store/_azure.pyi | 54 ++++++------------ obstore/python/obstore/store/_gcs.pyi | 51 ++++++----------- pyo3-object_store/src/aws.rs | 31 ++--------- pyo3-object_store/src/azure.rs | 31 ++--------- pyo3-object_store/src/gcp.rs | 31 ++--------- pyo3-object_store/src/local.rs | 2 +- 7 files changed, 74 insertions(+), 200 deletions(-) diff --git a/obstore/python/obstore/store/_aws.pyi b/obstore/python/obstore/store/_aws.pyi index 1c5d2bec..d77e6b86 100644 --- a/obstore/python/obstore/store/_aws.pyi +++ b/obstore/python/obstore/store/_aws.pyi @@ -487,71 +487,46 @@ class S3Config(TypedDict, total=False): """If virtual hosted style request has to be used.""" class S3Store: - """ - Configure a connection to Amazon S3 using the specified credentials in the specified - Amazon region and bucket. + """Interface to an Amazon S3 bucket. + + All constructors will check for environment variables. All environment variables + starting with `AWS_` will be evaluated. Names must match keys from + [`S3Config`][obstore.store.S3Config]. Only upper-case environment variables are + accepted. + + Some examples of variables extracted from environment: + + - `AWS_ACCESS_KEY_ID` -> access_key_id + - `AWS_SECRET_ACCESS_KEY` -> secret_access_key + - `AWS_DEFAULT_REGION` -> region + - `AWS_ENDPOINT` -> endpoint + - `AWS_SESSION_TOKEN` -> token + - `AWS_CONTAINER_CREDENTIALS_RELATIVE_URI` -> + - `AWS_REQUEST_PAYER` -> set to "true" to permit requester-pays connections. **Examples**: **Using requester-pays buckets**: - Pass `request_payer=True` as a keyword argument. Or, if you're using - `S3Store.from_env`, have `AWS_REQUESTER_PAYS=True` set in the environment. + Pass `request_payer=True` as a keyword argument or have `AWS_REQUESTER_PAYS=True` + set in the environment. **Anonymous requests**: - Pass `skip_signature=True` as a keyword argument. Or, if you're using - `S3Store.from_env`, have `AWS_SKIP_SIGNATURE=True` set in the environment. + Pass `skip_signature=True` as a keyword argument or have `AWS_SKIP_SIGNATURE=True` + set in the environment. """ def __init__( self, - bucket: str, - *, - config: S3Config | None = None, - client_options: ClientConfig | None = None, - retry_config: RetryConfig | None = None, - **kwargs: Unpack[S3Config], - ) -> None: - """Create a new S3Store - - Args: - bucket: The AWS bucket to use. - - Keyword Args: - config: AWS Configuration. Values in this config will override values inferred from the environment. Defaults to None. - client_options: HTTP Client options. Defaults to None. - retry_config: Retry configuration. Defaults to None. - - Returns: - S3Store - """ - - @classmethod - def from_env( - cls, bucket: str | None = None, *, config: S3Config | None = None, client_options: ClientConfig | None = None, retry_config: RetryConfig | None = None, **kwargs: Unpack[S3Config], - ) -> S3Store: - """Construct a new S3Store with regular AWS environment variables - - All environment variables starting with `AWS_` will be evaluated. Names must - match items from `S3ConfigKey`. Only upper-case environment variables are - accepted. - - Some examples of variables extracted from environment: - - - `AWS_ACCESS_KEY_ID` -> access_key_id - - `AWS_SECRET_ACCESS_KEY` -> secret_access_key - - `AWS_DEFAULT_REGION` -> region - - `AWS_ENDPOINT` -> endpoint - - `AWS_SESSION_TOKEN` -> token - - `AWS_CONTAINER_CREDENTIALS_RELATIVE_URI` -> - - `AWS_REQUEST_PAYER` -> set to "true" to permit requester-pays connections. + ) -> None: + """Create a new S3Store. Args: bucket: The AWS bucket to use. @@ -576,7 +551,7 @@ class S3Store: retry_config: RetryConfig | None = None, **kwargs: Unpack[S3Config], ) -> S3Store: - """Construct a new S3Store with credentials inferred from a boto3 Session + """Construct a new S3Store with credentials inferred from a boto3 Session. This can be useful to read S3 credentials from [disk-based credentials sources](https://docs.aws.amazon.com/cli/v1/userguide/cli-configure-files.html). @@ -616,8 +591,7 @@ class S3Store: retry_config: RetryConfig | None = None, **kwargs: Unpack[S3Config], ) -> S3Store: - """ - Parse available connection info from a well-known storage URL. + """Parse available connection info from a well-known storage URL. The supported url schemes are: diff --git a/obstore/python/obstore/store/_azure.pyi b/obstore/python/obstore/store/_azure.pyi index 4ccec1fa..b9e23de0 100644 --- a/obstore/python/obstore/store/_azure.pyi +++ b/obstore/python/obstore/store/_azure.pyi @@ -260,11 +260,26 @@ class AzureConfig(TypedDict, total=False): """Use object store with url scheme account.dfs.fabric.microsoft.com""" class AzureStore: - """Configure a connection to Microsoft Azure Blob Storage container using the specified credentials.""" + """Interface to a Microsoft Azure Blob Storage container. + + All constructors will check for environment variables. All environment variables + starting with `AZURE_` will be evaluated. Names must match keys from + [`AzureConfig`][obstore.store.AzureConfig]. Only upper-case environment variables + are accepted. + + Some examples of variables extracted from environment: + + - `AZURE_STORAGE_ACCOUNT_NAME`: storage account name + - `AZURE_STORAGE_ACCOUNT_KEY`: storage account master key + - `AZURE_STORAGE_ACCESS_KEY`: alias for `AZURE_STORAGE_ACCOUNT_KEY` + - `AZURE_STORAGE_CLIENT_ID` -> client id for service principal authorization + - `AZURE_STORAGE_CLIENT_SECRET` -> client secret for service principal authorization + - `AZURE_STORAGE_TENANT_ID` -> tenant id used in oauth flows + """ def __init__( self, - container: str, + container: str | None = None, *, config: AzureConfig | None = None, client_options: ClientConfig | None = None, @@ -274,40 +289,7 @@ class AzureStore: """Construct a new AzureStore. Args: - container: _description_ - - Keyword Args: - config: Azure Configuration. Values in this config will override values inferred from the url. Defaults to None. - client_options: HTTP Client options. Defaults to None. - retry_config: Retry configuration. Defaults to None. - - Returns: - AzureStore - """ - - @classmethod - def from_env( - cls, - container: str, - *, - config: AzureConfig | None = None, - client_options: ClientConfig | None = None, - retry_config: RetryConfig | None = None, - **kwargs: Unpack[AzureConfig], - ) -> AzureStore: - """Construct a new AzureStore with values pre-populated from environment variables. - - Variables extracted from environment: - - - `AZURE_STORAGE_ACCOUNT_NAME`: storage account name - - `AZURE_STORAGE_ACCOUNT_KEY`: storage account master key - - `AZURE_STORAGE_ACCESS_KEY`: alias for `AZURE_STORAGE_ACCOUNT_KEY` - - `AZURE_STORAGE_CLIENT_ID` -> client id for service principal authorization - - `AZURE_STORAGE_CLIENT_SECRET` -> client secret for service principal authorization - - `AZURE_STORAGE_TENANT_ID` -> tenant id used in oauth flows - - Args: - container: _description_ + container: the name of the container. Keyword Args: config: Azure Configuration. Values in this config will override values inferred from the url. Defaults to None. diff --git a/obstore/python/obstore/store/_gcs.pyi b/obstore/python/obstore/store/_gcs.pyi index f2017bdc..08a9d825 100644 --- a/obstore/python/obstore/store/_gcs.pyi +++ b/obstore/python/obstore/store/_gcs.pyi @@ -60,7 +60,21 @@ class GCSConfig(TypedDict, total=False): """Path to the service account file.""" class GCSStore: - """Configure a connection to Google Cloud Storage. + """Interface to Google Cloud Storage. + + All constructors will check for environment variables. All environment variables + starting with `GOOGLE_` will be evaluated. Names must match keys from + [`GCSConfig`][obstore.store.GCSConfig]. Only upper-case environment variables are + accepted. + + Some examples of variables extracted from environment: + + - `GOOGLE_SERVICE_ACCOUNT`: location of service account file + - `GOOGLE_SERVICE_ACCOUNT_PATH`: (alias) location of service account file + - `SERVICE_ACCOUNT`: (alias) location of service account file + - `GOOGLE_SERVICE_ACCOUNT_KEY`: JSON serialized service account key + - `GOOGLE_BUCKET`: bucket name + - `GOOGLE_BUCKET_NAME`: (alias) bucket name If no credentials are explicitly provided, they will be sourced from the environment as documented @@ -69,7 +83,7 @@ class GCSStore: def __init__( self, - bucket: str, + bucket: str | None = None, *, config: GCSConfig | None = None, client_options: ClientConfig | None = None, @@ -90,39 +104,6 @@ class GCSStore: GCSStore """ - @classmethod - def from_env( - cls, - bucket: str, - *, - config: GCSConfig | None = None, - client_options: ClientConfig | None = None, - retry_config: RetryConfig | None = None, - **kwargs: Unpack[GCSConfig], - ) -> GCSStore: - """Construct a new GCSStore with values pre-populated from environment variables. - - Variables extracted from environment: - - - `GOOGLE_SERVICE_ACCOUNT`: location of service account file - - `GOOGLE_SERVICE_ACCOUNT_PATH`: (alias) location of service account file - - `SERVICE_ACCOUNT`: (alias) location of service account file - - `GOOGLE_SERVICE_ACCOUNT_KEY`: JSON serialized service account key - - `GOOGLE_BUCKET`: bucket name - - `GOOGLE_BUCKET_NAME`: (alias) bucket name - - Args: - bucket: The GCS bucket to use. - - Keyword Args: - config: GCS Configuration. Values in this config will override values inferred from the environment. Defaults to None. - client_options: HTTP Client options. Defaults to None. - retry_config: Retry configuration. Defaults to None. - - Returns: - GCSStore - """ - @classmethod def from_url( cls, diff --git a/pyo3-object_store/src/aws.rs b/pyo3-object_store/src/aws.rs index bc786c07..0cb6bea9 100644 --- a/pyo3-object_store/src/aws.rs +++ b/pyo3-object_store/src/aws.rs @@ -34,32 +34,8 @@ impl PyS3Store { impl PyS3Store { // Create from parameters #[new] - #[pyo3(signature = (bucket, *, config=None, client_options=None, retry_config=None, **kwargs))] - fn new( - bucket: String, - config: Option, - client_options: Option, - retry_config: Option, - kwargs: Option, - ) -> PyObjectStoreResult { - let mut builder = AmazonS3Builder::new().with_bucket_name(bucket); - if let Some(config_kwargs) = combine_config_kwargs(config, kwargs)? { - builder = config_kwargs.apply_config(builder); - } - if let Some(client_options) = client_options { - builder = builder.with_client_options(client_options.into()) - } - if let Some(retry_config) = retry_config { - builder = builder.with_retry(retry_config.into()) - } - Ok(Self(Arc::new(builder.build()?))) - } - - // Create from env variables - #[classmethod] #[pyo3(signature = (bucket=None, *, config=None, client_options=None, retry_config=None, **kwargs))] - fn from_env( - _cls: &Bound, + fn new( bucket: Option, config: Option, client_options: Option, @@ -116,7 +92,10 @@ impl PyS3Store { .getattr(intern!(py, "token"))? .extract::>()?; - let mut builder = AmazonS3Builder::new().with_bucket_name(bucket); + // We read env variables because even though boto3.Session reads env variables itself, + // there may be more variables set than just authentication. Regardless, any variables set + // by the environment will be overwritten below if they exist/were passed in. + let mut builder = AmazonS3Builder::from_env().with_bucket_name(bucket); if let Some(region) = region { builder = builder.with_region(region); } diff --git a/pyo3-object_store/src/azure.rs b/pyo3-object_store/src/azure.rs index 4c83f580..e969fcdc 100644 --- a/pyo3-object_store/src/azure.rs +++ b/pyo3-object_store/src/azure.rs @@ -33,39 +33,18 @@ impl PyAzureStore { impl PyAzureStore { // Create from parameters #[new] - #[pyo3(signature = (container, *, config=None, client_options=None, retry_config=None, **kwargs))] + #[pyo3(signature = (container=None, *, config=None, client_options=None, retry_config=None, **kwargs))] fn new( - container: String, + container: Option, config: Option, client_options: Option, retry_config: Option, kwargs: Option, ) -> PyObjectStoreResult { - let mut builder = MicrosoftAzureBuilder::new().with_container_name(container); - if let Some(config_kwargs) = combine_config_kwargs(config, kwargs)? { - builder = config_kwargs.apply_config(builder); - } - if let Some(client_options) = client_options { - builder = builder.with_client_options(client_options.into()) + let mut builder = MicrosoftAzureBuilder::from_env(); + if let Some(container) = container { + builder = builder.with_container_name(container); } - if let Some(retry_config) = retry_config { - builder = builder.with_retry(retry_config.into()) - } - Ok(Self(Arc::new(builder.build()?))) - } - - // Create from env variables - #[classmethod] - #[pyo3(signature = (container, *, config=None, client_options=None, retry_config=None, **kwargs))] - fn from_env( - _cls: &Bound, - container: String, - config: Option, - client_options: Option, - retry_config: Option, - kwargs: Option, - ) -> PyObjectStoreResult { - let mut builder = MicrosoftAzureBuilder::from_env().with_container_name(container); if let Some(config_kwargs) = combine_config_kwargs(config, kwargs)? { builder = config_kwargs.apply_config(builder); } diff --git a/pyo3-object_store/src/gcp.rs b/pyo3-object_store/src/gcp.rs index c19adbd5..5abd272f 100644 --- a/pyo3-object_store/src/gcp.rs +++ b/pyo3-object_store/src/gcp.rs @@ -33,39 +33,18 @@ impl PyGCSStore { impl PyGCSStore { // Create from parameters #[new] - #[pyo3(signature = (bucket, *, config=None, client_options=None, retry_config=None, **kwargs))] + #[pyo3(signature = (bucket=None, *, config=None, client_options=None, retry_config=None, **kwargs))] fn new( - bucket: String, + bucket: Option, config: Option, client_options: Option, retry_config: Option, kwargs: Option, ) -> PyObjectStoreResult { - let mut builder = GoogleCloudStorageBuilder::new().with_bucket_name(bucket); - if let Some(config_kwargs) = combine_config_kwargs(config, kwargs)? { - builder = config_kwargs.apply_config(builder); - } - if let Some(client_options) = client_options { - builder = builder.with_client_options(client_options.into()) + let mut builder = GoogleCloudStorageBuilder::from_env(); + if let Some(bucket) = bucket { + builder = builder.with_bucket_name(bucket); } - if let Some(retry_config) = retry_config { - builder = builder.with_retry(retry_config.into()) - } - Ok(Self(Arc::new(builder.build()?))) - } - - // Create from env variables - #[classmethod] - #[pyo3(signature = (bucket, *, config=None, client_options=None, retry_config=None, **kwargs))] - fn from_env( - _cls: &Bound, - bucket: String, - config: Option, - client_options: Option, - retry_config: Option, - kwargs: Option, - ) -> PyObjectStoreResult { - let mut builder = GoogleCloudStorageBuilder::from_env().with_bucket_name(bucket); if let Some(config_kwargs) = combine_config_kwargs(config, kwargs)? { builder = config_kwargs.apply_config(builder); } diff --git a/pyo3-object_store/src/local.rs b/pyo3-object_store/src/local.rs index 9fafa63e..b835b268 100644 --- a/pyo3-object_store/src/local.rs +++ b/pyo3-object_store/src/local.rs @@ -30,7 +30,7 @@ impl PyLocalStore { #[pymethods] impl PyLocalStore { #[new] - #[pyo3(signature = (prefix = None, *, automatic_cleanup=false, mkdir=false))] + #[pyo3(signature = (prefix=None, *, automatic_cleanup=false, mkdir=false))] fn py_new( prefix: Option, automatic_cleanup: bool,