Skip to content

Commit

Permalink
Always check env when constructing stores, remove from_env (#189)
Browse files Browse the repository at this point in the history
  • Loading branch information
kylebarron authored Jan 31, 2025
1 parent 5a55ec7 commit ffa3dd9
Show file tree
Hide file tree
Showing 7 changed files with 74 additions and 200 deletions.
74 changes: 24 additions & 50 deletions obstore/python/obstore/store/_aws.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -487,71 +487,46 @@ class S3Config(TypedDict, total=False):
"""If virtual hosted style request has to be used."""

class S3Store:
"""
Configure a connection to Amazon S3 using the specified credentials in the specified
Amazon region and bucket.
"""Interface to an Amazon S3 bucket.
All constructors will check for environment variables. All environment variables
starting with `AWS_` will be evaluated. Names must match keys from
[`S3Config`][obstore.store.S3Config]. Only upper-case environment variables are
accepted.
Some examples of variables extracted from environment:
- `AWS_ACCESS_KEY_ID` -> access_key_id
- `AWS_SECRET_ACCESS_KEY` -> secret_access_key
- `AWS_DEFAULT_REGION` -> region
- `AWS_ENDPOINT` -> endpoint
- `AWS_SESSION_TOKEN` -> token
- `AWS_CONTAINER_CREDENTIALS_RELATIVE_URI` -> <https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-iam-roles.html>
- `AWS_REQUEST_PAYER` -> set to "true" to permit requester-pays connections.
**Examples**:
**Using requester-pays buckets**:
Pass `request_payer=True` as a keyword argument. Or, if you're using
`S3Store.from_env`, have `AWS_REQUESTER_PAYS=True` set in the environment.
Pass `request_payer=True` as a keyword argument or have `AWS_REQUESTER_PAYS=True`
set in the environment.
**Anonymous requests**:
Pass `skip_signature=True` as a keyword argument. Or, if you're using
`S3Store.from_env`, have `AWS_SKIP_SIGNATURE=True` set in the environment.
Pass `skip_signature=True` as a keyword argument or have `AWS_SKIP_SIGNATURE=True`
set in the environment.
"""

def __init__(
self,
bucket: str,
*,
config: S3Config | None = None,
client_options: ClientConfig | None = None,
retry_config: RetryConfig | None = None,
**kwargs: Unpack[S3Config],
) -> None:
"""Create a new S3Store
Args:
bucket: The AWS bucket to use.
Keyword Args:
config: AWS Configuration. Values in this config will override values inferred from the environment. Defaults to None.
client_options: HTTP Client options. Defaults to None.
retry_config: Retry configuration. Defaults to None.
Returns:
S3Store
"""

@classmethod
def from_env(
cls,
bucket: str | None = None,
*,
config: S3Config | None = None,
client_options: ClientConfig | None = None,
retry_config: RetryConfig | None = None,
**kwargs: Unpack[S3Config],
) -> S3Store:
"""Construct a new S3Store with regular AWS environment variables
All environment variables starting with `AWS_` will be evaluated. Names must
match items from `S3ConfigKey`. Only upper-case environment variables are
accepted.
Some examples of variables extracted from environment:
- `AWS_ACCESS_KEY_ID` -> access_key_id
- `AWS_SECRET_ACCESS_KEY` -> secret_access_key
- `AWS_DEFAULT_REGION` -> region
- `AWS_ENDPOINT` -> endpoint
- `AWS_SESSION_TOKEN` -> token
- `AWS_CONTAINER_CREDENTIALS_RELATIVE_URI` -> <https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-iam-roles.html>
- `AWS_REQUEST_PAYER` -> set to "true" to permit requester-pays connections.
) -> None:
"""Create a new S3Store.
Args:
bucket: The AWS bucket to use.
Expand All @@ -576,7 +551,7 @@ class S3Store:
retry_config: RetryConfig | None = None,
**kwargs: Unpack[S3Config],
) -> S3Store:
"""Construct a new S3Store with credentials inferred from a boto3 Session
"""Construct a new S3Store with credentials inferred from a boto3 Session.
This can be useful to read S3 credentials from [disk-based credentials sources](https://docs.aws.amazon.com/cli/v1/userguide/cli-configure-files.html).
Expand Down Expand Up @@ -616,8 +591,7 @@ class S3Store:
retry_config: RetryConfig | None = None,
**kwargs: Unpack[S3Config],
) -> S3Store:
"""
Parse available connection info from a well-known storage URL.
"""Parse available connection info from a well-known storage URL.
The supported url schemes are:
Expand Down
54 changes: 18 additions & 36 deletions obstore/python/obstore/store/_azure.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -260,11 +260,26 @@ class AzureConfig(TypedDict, total=False):
"""Use object store with url scheme account.dfs.fabric.microsoft.com"""

class AzureStore:
"""Configure a connection to Microsoft Azure Blob Storage container using the specified credentials."""
"""Interface to a Microsoft Azure Blob Storage container.
All constructors will check for environment variables. All environment variables
starting with `AZURE_` will be evaluated. Names must match keys from
[`AzureConfig`][obstore.store.AzureConfig]. Only upper-case environment variables
are accepted.
Some examples of variables extracted from environment:
- `AZURE_STORAGE_ACCOUNT_NAME`: storage account name
- `AZURE_STORAGE_ACCOUNT_KEY`: storage account master key
- `AZURE_STORAGE_ACCESS_KEY`: alias for `AZURE_STORAGE_ACCOUNT_KEY`
- `AZURE_STORAGE_CLIENT_ID` -> client id for service principal authorization
- `AZURE_STORAGE_CLIENT_SECRET` -> client secret for service principal authorization
- `AZURE_STORAGE_TENANT_ID` -> tenant id used in oauth flows
"""

def __init__(
self,
container: str,
container: str | None = None,
*,
config: AzureConfig | None = None,
client_options: ClientConfig | None = None,
Expand All @@ -274,40 +289,7 @@ class AzureStore:
"""Construct a new AzureStore.
Args:
container: _description_
Keyword Args:
config: Azure Configuration. Values in this config will override values inferred from the url. Defaults to None.
client_options: HTTP Client options. Defaults to None.
retry_config: Retry configuration. Defaults to None.
Returns:
AzureStore
"""

@classmethod
def from_env(
cls,
container: str,
*,
config: AzureConfig | None = None,
client_options: ClientConfig | None = None,
retry_config: RetryConfig | None = None,
**kwargs: Unpack[AzureConfig],
) -> AzureStore:
"""Construct a new AzureStore with values pre-populated from environment variables.
Variables extracted from environment:
- `AZURE_STORAGE_ACCOUNT_NAME`: storage account name
- `AZURE_STORAGE_ACCOUNT_KEY`: storage account master key
- `AZURE_STORAGE_ACCESS_KEY`: alias for `AZURE_STORAGE_ACCOUNT_KEY`
- `AZURE_STORAGE_CLIENT_ID` -> client id for service principal authorization
- `AZURE_STORAGE_CLIENT_SECRET` -> client secret for service principal authorization
- `AZURE_STORAGE_TENANT_ID` -> tenant id used in oauth flows
Args:
container: _description_
container: the name of the container.
Keyword Args:
config: Azure Configuration. Values in this config will override values inferred from the url. Defaults to None.
Expand Down
51 changes: 16 additions & 35 deletions obstore/python/obstore/store/_gcs.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,21 @@ class GCSConfig(TypedDict, total=False):
"""Path to the service account file."""

class GCSStore:
"""Configure a connection to Google Cloud Storage.
"""Interface to Google Cloud Storage.
All constructors will check for environment variables. All environment variables
starting with `GOOGLE_` will be evaluated. Names must match keys from
[`GCSConfig`][obstore.store.GCSConfig]. Only upper-case environment variables are
accepted.
Some examples of variables extracted from environment:
- `GOOGLE_SERVICE_ACCOUNT`: location of service account file
- `GOOGLE_SERVICE_ACCOUNT_PATH`: (alias) location of service account file
- `SERVICE_ACCOUNT`: (alias) location of service account file
- `GOOGLE_SERVICE_ACCOUNT_KEY`: JSON serialized service account key
- `GOOGLE_BUCKET`: bucket name
- `GOOGLE_BUCKET_NAME`: (alias) bucket name
If no credentials are explicitly provided, they will be sourced from the environment
as documented
Expand All @@ -69,7 +83,7 @@ class GCSStore:

def __init__(
self,
bucket: str,
bucket: str | None = None,
*,
config: GCSConfig | None = None,
client_options: ClientConfig | None = None,
Expand All @@ -90,39 +104,6 @@ class GCSStore:
GCSStore
"""

@classmethod
def from_env(
cls,
bucket: str,
*,
config: GCSConfig | None = None,
client_options: ClientConfig | None = None,
retry_config: RetryConfig | None = None,
**kwargs: Unpack[GCSConfig],
) -> GCSStore:
"""Construct a new GCSStore with values pre-populated from environment variables.
Variables extracted from environment:
- `GOOGLE_SERVICE_ACCOUNT`: location of service account file
- `GOOGLE_SERVICE_ACCOUNT_PATH`: (alias) location of service account file
- `SERVICE_ACCOUNT`: (alias) location of service account file
- `GOOGLE_SERVICE_ACCOUNT_KEY`: JSON serialized service account key
- `GOOGLE_BUCKET`: bucket name
- `GOOGLE_BUCKET_NAME`: (alias) bucket name
Args:
bucket: The GCS bucket to use.
Keyword Args:
config: GCS Configuration. Values in this config will override values inferred from the environment. Defaults to None.
client_options: HTTP Client options. Defaults to None.
retry_config: Retry configuration. Defaults to None.
Returns:
GCSStore
"""

@classmethod
def from_url(
cls,
Expand Down
31 changes: 5 additions & 26 deletions pyo3-object_store/src/aws.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,32 +34,8 @@ impl PyS3Store {
impl PyS3Store {
// Create from parameters
#[new]
#[pyo3(signature = (bucket, *, config=None, client_options=None, retry_config=None, **kwargs))]
fn new(
bucket: String,
config: Option<PyAmazonS3Config>,
client_options: Option<PyClientOptions>,
retry_config: Option<PyRetryConfig>,
kwargs: Option<PyAmazonS3Config>,
) -> PyObjectStoreResult<Self> {
let mut builder = AmazonS3Builder::new().with_bucket_name(bucket);
if let Some(config_kwargs) = combine_config_kwargs(config, kwargs)? {
builder = config_kwargs.apply_config(builder);
}
if let Some(client_options) = client_options {
builder = builder.with_client_options(client_options.into())
}
if let Some(retry_config) = retry_config {
builder = builder.with_retry(retry_config.into())
}
Ok(Self(Arc::new(builder.build()?)))
}

// Create from env variables
#[classmethod]
#[pyo3(signature = (bucket=None, *, config=None, client_options=None, retry_config=None, **kwargs))]
fn from_env(
_cls: &Bound<PyType>,
fn new(
bucket: Option<String>,
config: Option<PyAmazonS3Config>,
client_options: Option<PyClientOptions>,
Expand Down Expand Up @@ -116,7 +92,10 @@ impl PyS3Store {
.getattr(intern!(py, "token"))?
.extract::<Option<String>>()?;

let mut builder = AmazonS3Builder::new().with_bucket_name(bucket);
// We read env variables because even though boto3.Session reads env variables itself,
// there may be more variables set than just authentication. Regardless, any variables set
// by the environment will be overwritten below if they exist/were passed in.
let mut builder = AmazonS3Builder::from_env().with_bucket_name(bucket);
if let Some(region) = region {
builder = builder.with_region(region);
}
Expand Down
31 changes: 5 additions & 26 deletions pyo3-object_store/src/azure.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,39 +33,18 @@ impl PyAzureStore {
impl PyAzureStore {
// Create from parameters
#[new]
#[pyo3(signature = (container, *, config=None, client_options=None, retry_config=None, **kwargs))]
#[pyo3(signature = (container=None, *, config=None, client_options=None, retry_config=None, **kwargs))]
fn new(
container: String,
container: Option<String>,
config: Option<PyAzureConfig>,
client_options: Option<PyClientOptions>,
retry_config: Option<PyRetryConfig>,
kwargs: Option<PyAzureConfig>,
) -> PyObjectStoreResult<Self> {
let mut builder = MicrosoftAzureBuilder::new().with_container_name(container);
if let Some(config_kwargs) = combine_config_kwargs(config, kwargs)? {
builder = config_kwargs.apply_config(builder);
}
if let Some(client_options) = client_options {
builder = builder.with_client_options(client_options.into())
let mut builder = MicrosoftAzureBuilder::from_env();
if let Some(container) = container {
builder = builder.with_container_name(container);
}
if let Some(retry_config) = retry_config {
builder = builder.with_retry(retry_config.into())
}
Ok(Self(Arc::new(builder.build()?)))
}

// Create from env variables
#[classmethod]
#[pyo3(signature = (container, *, config=None, client_options=None, retry_config=None, **kwargs))]
fn from_env(
_cls: &Bound<PyType>,
container: String,
config: Option<PyAzureConfig>,
client_options: Option<PyClientOptions>,
retry_config: Option<PyRetryConfig>,
kwargs: Option<PyAzureConfig>,
) -> PyObjectStoreResult<Self> {
let mut builder = MicrosoftAzureBuilder::from_env().with_container_name(container);
if let Some(config_kwargs) = combine_config_kwargs(config, kwargs)? {
builder = config_kwargs.apply_config(builder);
}
Expand Down
31 changes: 5 additions & 26 deletions pyo3-object_store/src/gcp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,39 +33,18 @@ impl PyGCSStore {
impl PyGCSStore {
// Create from parameters
#[new]
#[pyo3(signature = (bucket, *, config=None, client_options=None, retry_config=None, **kwargs))]
#[pyo3(signature = (bucket=None, *, config=None, client_options=None, retry_config=None, **kwargs))]
fn new(
bucket: String,
bucket: Option<String>,
config: Option<PyGoogleConfig>,
client_options: Option<PyClientOptions>,
retry_config: Option<PyRetryConfig>,
kwargs: Option<PyGoogleConfig>,
) -> PyObjectStoreResult<Self> {
let mut builder = GoogleCloudStorageBuilder::new().with_bucket_name(bucket);
if let Some(config_kwargs) = combine_config_kwargs(config, kwargs)? {
builder = config_kwargs.apply_config(builder);
}
if let Some(client_options) = client_options {
builder = builder.with_client_options(client_options.into())
let mut builder = GoogleCloudStorageBuilder::from_env();
if let Some(bucket) = bucket {
builder = builder.with_bucket_name(bucket);
}
if let Some(retry_config) = retry_config {
builder = builder.with_retry(retry_config.into())
}
Ok(Self(Arc::new(builder.build()?)))
}

// Create from env variables
#[classmethod]
#[pyo3(signature = (bucket, *, config=None, client_options=None, retry_config=None, **kwargs))]
fn from_env(
_cls: &Bound<PyType>,
bucket: String,
config: Option<PyGoogleConfig>,
client_options: Option<PyClientOptions>,
retry_config: Option<PyRetryConfig>,
kwargs: Option<PyGoogleConfig>,
) -> PyObjectStoreResult<Self> {
let mut builder = GoogleCloudStorageBuilder::from_env().with_bucket_name(bucket);
if let Some(config_kwargs) = combine_config_kwargs(config, kwargs)? {
builder = config_kwargs.apply_config(builder);
}
Expand Down
Loading

0 comments on commit ffa3dd9

Please sign in to comment.