From 7344f1d7b4a1fbdc1a55c778965175e14a289913 Mon Sep 17 00:00:00 2001 From: adam Date: Tue, 2 Jan 2024 11:23:52 -0700 Subject: [PATCH 1/4] Add modernized tox environments/linting --- README.md | 8 +- docs/source/auth-providers.md | 133 ++-- docs/source/conf.py | 34 +- docs/source/configuration.md | 42 +- docs/source/installation.md | 10 +- docs/source/jwt-auth-guide.md | 94 +-- docs/source/multipart-spec.md | 120 ++-- docs/source/quickstart.md | 70 +-- docs/source/storage-backends.md | 30 +- docs/source/transfer-adapters.md | 32 +- docs/source/using-gcs.md | 74 +-- docs/source/wsgi-middleware.md | 38 +- giftless/app.py | 21 +- giftless/auth/__init__.py | 87 +-- giftless/auth/allow_anon.py | 12 +- giftless/auth/identity.py | 61 +- giftless/auth/jwt.py | 225 ++++--- giftless/config.py | 70 ++- giftless/error_handling.py | 6 +- giftless/exc.py | 2 +- giftless/representation.py | 12 +- giftless/schema.py | 27 +- giftless/storage/__init__.py | 53 +- giftless/storage/amazon_s3.py | 99 +-- giftless/storage/azure.py | 268 ++++++--- giftless/storage/exc.py | 7 +- giftless/storage/google_cloud.py | 118 ++-- giftless/storage/local_storage.py | 28 +- giftless/transfer/__init__.py | 82 ++- giftless/transfer/basic_external.py | 70 ++- giftless/transfer/basic_streaming.py | 139 +++-- giftless/transfer/multipart.py | 83 ++- giftless/transfer/types.py | 4 +- giftless/util.py | 16 +- giftless/view.py | 53 +- pytest.ini | 2 +- setup.py | 38 +- tests/auth/test_auth.py | 264 ++++++-- tests/auth/test_jwt.py | 568 ++++++++++++------ tests/conftest.py | 27 +- tests/helpers.py | 14 +- tests/storage/__init__.py | 93 +-- tests/storage/test_amazon_s3.py | 35 +- tests/storage/test_azure.py | 44 +- tests/storage/test_google_cloud.py | 135 +++-- tests/storage/test_local.py | 11 +- tests/test_batch_api.py | 222 ++++--- tests/test_error_responses.py | 34 +- tests/test_middleware.py | 81 ++- tests/test_schema.py | 56 +- tests/transfer/conftest.py | 3 +- tests/transfer/test_basic_external_adapter.py | 168 +++--- tests/transfer/test_module.py | 21 +- tox.ini | 40 +- 54 files changed, 2462 insertions(+), 1622 deletions(-) diff --git a/README.md b/README.md index 747815e..a12bd63 100644 --- a/README.md +++ b/README.md @@ -20,11 +20,11 @@ storage backends: In addition, Giftless implements a custom transfer mode called `multipart-basic`, which is designed to take advantage of many vendors' multipart upload -capabilities. It requires a specialized Git LFS client to use, and is currently -not supported by standard Git LFS. +capabilities. It requires a specialized Git LFS client to use, and is currently +not supported by standard Git LFS. See the [giftless-client](https://github.com/datopian/giftless-client) project -for a compatible Python Git LFS client. +for a compatible Python Git LFS client. Additional transfer modes and storage backends could easily be added and configured. @@ -34,7 +34,7 @@ configured. Documentation ------------- * [Installation Guide](https://giftless.datopian.com/en/latest/installation.html) -* [Getting Started](https://giftless.datopian.com/en/latest/quickstart.html) +* [Getting Started](https://giftless.datopian.com/en/latest/quickstart.html) * [Full Documentation](https://giftless.datopian.com/en/latest/) * [Developer Guide](https://giftless.datopian.com/en/latest/development.html) diff --git a/docs/source/auth-providers.md b/docs/source/auth-providers.md index 0f2fe9c..e5ca201 100644 --- a/docs/source/auth-providers.md +++ b/docs/source/auth-providers.md @@ -2,18 +2,18 @@ Authentication and Authorization Providers ========================================== ## Overview -Authentication and authorization in Giftless are pluggable and can easily be customized. -While Giftless typically bundles together code that handles both authentication and to +Authentication and authorization in Giftless are pluggable and can easily be customized. +While Giftless typically bundles together code that handles both authentication and to some degree authorization, the two concepts should be understood separately first in order -to understand how they are handled by Giftless. +to understand how they are handled by Giftless. -* *Authentication* (sometimes abbreviated here and in the code as `authn`) relates to +* *Authentication* (sometimes abbreviated here and in the code as `authn`) relates to validating the identity of the entity (person or machine) sending a request to Giftless -* *Authorization* (sometimes abbreviated as `authz`) relates to deciding, once an -identity has been established, whether the requesting party is permitted to perform -the requested operation +* *Authorization* (sometimes abbreviated as `authz`) relates to deciding, once an +identity has been established, whether the requesting party is permitted to perform +the requested operation -``` note:: In this guide and elsewhere we may refer to *auth* as a way of referring to +``` note:: In this guide and elsewhere we may refer to *auth* as a way of referring to both authentication and authorization in general, or where distinction between the two concepts is not important. ``` @@ -24,54 +24,54 @@ Giftless provides the following authentication and authorization modules by defa * `giftless.auth.jwt:JWTAuthenticator` - uses [JWT tokens](https://jwt.io/) to both identify the user and grant permissions based on scopes embedded in the token payload. * `giftless.auth.allow_anon:read_only` - grants read-only permissions on everything to every - request; Typically, this is only useful in testing environments or in very limited + request; Typically, this is only useful in testing environments or in very limited deployments. * `giftless.auth.allow_anon:read_write` - grants full permissions on everything to every - request; Typically, this is only useful in testing environments or in very limited + request; Typically, this is only useful in testing environments or in very limited deployments. ## Configuring Authenticators -Giftless allows you to specify one or more auth module via the `AUTH_PROVIDERS` configuration -key. This accepts a *list* of one or more auth modules. When a request comes in, auth modules will -be invoked by order, one by one, until an identity is established. +Giftless allows you to specify one or more auth module via the `AUTH_PROVIDERS` configuration +key. This accepts a *list* of one or more auth modules. When a request comes in, auth modules will +be invoked by order, one by one, until an identity is established. For example: ```yaml AUTH_PROVIDERS: - factory: giftless.auth.jwt:factory - options: + options: algorithm: HS256 private_key: s3cret,don'ttellany0ne - giftless.auth.allow_anon:read_only ``` -The config block above defines 2 auth providers: first, the `JWT` auth provider will be +The config block above defines 2 auth providers: first, the `JWT` auth provider will be tried. If it manages to produce an identity (i.e. the request contains an acceptable JWT token), it will be used. If the request does not cotain a `JWT` token, Giftless will fall back to the next provider - in this case, the `allow_anon:read_only` provider which will -allow read-only access to anyone. +allow read-only access to anyone. This allows servers to be set up to accept different authorization paradigms. You'll notice that each item in the `AUTH_PROVIDERS` list can be either an object with -`factory` and `options` keys - in which case Giftless will load the auth module by -calling the `factory` Python callable (in the example above, the `factory` function in -the `giftless.auth.jwt` Python module); Or, in simpler cases, it can be just a string +`factory` and `options` keys - in which case Giftless will load the auth module by +calling the `factory` Python callable (in the example above, the `factory` function in +the `giftless.auth.jwt` Python module); Or, in simpler cases, it can be just a string (as in the case of our 2nd provider), which will be treated as a `factory` value with no options. -Read below for the `options` possible for specific auth modules. +Read below for the `options` possible for specific auth modules. ## JWT Authenticator This authenticator authenticates users by accepting a well-formed [JWT token](https://jwt.io/) -in the Authorization header as a Bearer type token, or as the value of the `?jwt=` query -parameter. Tokens must be signed by the right key, and also match in terms of audience, +in the Authorization header as a Bearer type token, or as the value of the `?jwt=` query +parameter. Tokens must be signed by the right key, and also match in terms of audience, issuer and key ID if configured, and of course have valid course expiry / not before times. ### Piggybacking on `Basic` HTTP auth The JWT authenticator will also accept JWT tokens as the password for the `_jwt` user in `Basic` HTTP `Authorization` header payload. This is designed to allow easier integration with clients that only support -Basic HTTP authentication. +Basic HTTP authentication. You can disable this functionality or change the expected username using the `basic_auth_user` configuration option. @@ -80,41 +80,41 @@ The following options are available for the `jwt` auth module: * `algorithm` (`str`): JWT algorithm to use, e.g. `HS256` (default) or `RS256`. Must match the algorithm used by your token provider -* `public_key` (`str`): Public key string, used to verify tokens signed with any asymmetric algorithm (i.e. all +* `public_key` (`str`): Public key string, used to verify tokens signed with any asymmetric algorithm (i.e. all algorithms except `HS*`); Optional, not needed if a symmetric algorithm is in use. -* `public_key_file` (`str`): Path to file containing the public key. Specify as an alternative to `public_key`. -* `private_key` (`str`): Private key string, used to verify tokens signed with a symmetric algorithm (i.e. `HS*`); - Optional, not needed if an asymmetric algorithm is in use. -* `public_key_file` (`str`): Path to file containing the private key. Specify as an alternative to `private_key`. +* `public_key_file` (`str`): Path to file containing the public key. Specify as an alternative to `public_key`. +* `private_key` (`str`): Private key string, used to verify tokens signed with a symmetric algorithm (i.e. `HS*`); + Optional, not needed if an asymmetric algorithm is in use. +* `public_key_file` (`str`): Path to file containing the private key. Specify as an alternative to `private_key`. * `leeway` (`int`): Key expiry time leeway in seconds (default is 60); This allows for a small clock time skew between the key provider and Giftless server -* `key_id` (`str`): Optional key ID string. If provided, only keys with this ID will be accepted. +* `key_id` (`str`): Optional key ID string. If provided, only keys with this ID will be accepted. * `basic_auth_user` (`str`): Optional HTTP Basic authentication username to look for when piggybacking on Basic - authentication. Default is `_jwt`. Can be set to `None` to disable inspecting `Basic` auth headers. + authentication. Default is `_jwt`. Can be set to `None` to disable inspecting `Basic` auth headers. #### Options only used when module used for generating JWT tokens -The following options are currently only in use when the module is used for generating tokens for +The following options are currently only in use when the module is used for generating tokens for self-signed requests (i.e. not as an `AUTH_PROVIDER`, but as a `PRE_AUTHORIZED_ACTION_PROVIDER`): -* `default_lifetime` (`int`): lifetime of token in seconds +* `default_lifetime` (`int`): lifetime of token in seconds * `issuer` (`str`): token issuer (optional) * `audience` (`str`): token audience (optional) ### JWT Authentication Flow A typical flow for JWT is: -0. There is an external *trusted* system that can generate and sign JWT tokens and +0. There is an external *trusted* system that can generate and sign JWT tokens and Giftless is configured to verify and accept tokens signed by this system 1. User is logged in to this external system -2. A JWT token is generated and signed by this system, granting permission to specific +2. A JWT token is generated and signed by this system, granting permission to specific scopes applicable to Giftless 3. The user sends the JWT token along with any request to Giftless, using either the `Authorization: Bearer ...` header or the `?jwt=...` query parameter -4. Giftless validates and decodes the token, and proceeds to grant permissions -based on the `scopes` claim embedded in the token. +4. Giftless validates and decodes the token, and proceeds to grant permissions +based on the `scopes` claim embedded in the token. -To clarify, it is up to the 3rd party identity / authorization provider to decide, -based on the known user identity, what scopes to grant. +To clarify, it is up to the 3rd party identity / authorization provider to decide, +based on the known user identity, what scopes to grant. ### Scopes Beyond authentication, JWT tokens may also include authorization payload @@ -131,15 +131,15 @@ or: Where: -* `{org}` is the organization of the target object -* `{repo}` is the repository of the target object. Omitting or replacing with `*` +* `{org}` is the organization of the target object +* `{repo}` is the repository of the target object. Omitting or replacing with `*` designates we are granting access to all repositories in the organization -* `{oid}` is the Object ID. Omitting or replacing with `*` designates we are granting +* `{oid}` is the Object ID. Omitting or replacing with `*` designates we are granting access to all objects in the repository -* `{subscope}` can be `metadata` or omitted entirely. If `metadata` is specified, - the scope does not grant access to actual files, but to metadata only - e.g. objects +* `{subscope}` can be `metadata` or omitted entirely. If `metadata` is specified, + the scope does not grant access to actual files, but to metadata only - e.g. objects can be verified to exist but not downloaded. -* `{actions}` is a comma separated list of allowed actions. Actions can be `read`, `write` +* `{actions}` is a comma separated list of allowed actions. Actions can be `read`, `write` or `verify`. If omitted or replaced with a `*`, all actions are permitted. ### Examples @@ -193,27 +193,27 @@ servers. ## Understanding Authentication and Authorization Providers -This part is more abstract, and will help you understand how Giftless handles -authentication and authorization in general. If you want to create a custom auth -module, or better understand how provided auth modules work, read on. +This part is more abstract, and will help you understand how Giftless handles +authentication and authorization in general. If you want to create a custom auth +module, or better understand how provided auth modules work, read on. Giftless' authentication and authorization module defines two key interfaces for handling authentication and authorization: ### Authenticators -Authenticator classes are subclasses of `giftless.auth.Authenticator`. One or more -authenticators can be configured at runtime, and each authenticator can try to obtain a -valid user identity from a given HTTP request. +Authenticator classes are subclasses of `giftless.auth.Authenticator`. One or more +authenticators can be configured at runtime, and each authenticator can try to obtain a +valid user identity from a given HTTP request. Once an identity has been established, an `Identity` (see below) object will be returned, and it is the role of the Authenticator class to populate this object with information about -the user, such as their name and email, and potentially, information on granted permissions. +the user, such as their name and email, and potentially, information on granted permissions. -Multiple authenticators can be chained, so that if one authenticator cannot find a valid +Multiple authenticators can be chained, so that if one authenticator cannot find a valid identity in the request, the next authenticator will be called. If no authenticator manages -to return a valid identity, by default a `401 Unauthorized` response will be returned for +to return a valid identity, by default a `401 Unauthorized` response will be returned for any action, but this behavior can be modified via the `@Authentication.no_identity_handler` -decorator. +decorator. ### Identity Very simply, an `Identity` object encapsulates information about the current user making the @@ -225,27 +225,32 @@ class Identity: id: Optional[str] = None email: Optional[str] = None - def is_authorized(self, organization: str, repo: str, permission: Permission, oid: Optional[str] = None) -> bool: - """Tell if user is authorized to perform an operation on an object / repo - """ + def is_authorized( + self, + organization: str, + repo: str, + permission: Permission, + oid: Optional[str] = None, + ) -> bool: + """Tell if user is authorized to perform an operation on an object / repo""" pass ``` -Most notably, the `is_authorized` method will be used to tell whether the user, represented by +Most notably, the `is_authorized` method will be used to tell whether the user, represented by the Identity object, is authorized to perform an action (one of the `Permission` values specified -below) on a given entity. +below) on a given entity. -Authorizer classes may use the default built-in `DefaultIdentity`, or implement an `Identity` -subclass of their own. +Authorizer classes may use the default built-in `DefaultIdentity`, or implement an `Identity` +subclass of their own. #### Permissions Giftless defines the following permissions on entites: ```python class Permission(Enum): - READ = 'read' - READ_META = 'read-meta' - WRITE = 'write' + READ = "read" + READ_META = "read-meta" + WRITE = "write" ``` For example, if `Permission.WRITE` is granted on an object or a repository, the user will diff --git a/docs/source/conf.py b/docs/source/conf.py index 9ceb3b2..2a116e1 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -20,12 +20,12 @@ # -- Project information ----------------------------------------------------- -project = 'giftless' -copyright = '2020, Datopian / Viderum Inc.' -author = 'Shahar Evron' +project = "giftless" +copyright = "2020, Datopian / Viderum Inc." +author = "Shahar Evron" # The full version, including alpha/beta/rc tags -with open(os.path.join(os.path.dirname(__file__), '..', '..', 'VERSION')) as f: +with open(os.path.join(os.path.dirname(__file__), "..", "..", "VERSION")) as f: release = f.read().strip() @@ -35,14 +35,14 @@ # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - 'recommonmark', - 'sphinx.ext.autodoc', - 'sphinx.ext.autosectionlabel', - 'sphinx_autodoc_typehints' + "recommonmark", + "sphinx.ext.autodoc", + "sphinx.ext.autosectionlabel", + "sphinx_autodoc_typehints", ] # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. @@ -55,12 +55,12 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -html_theme = 'furo' +html_theme = "furo" # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +html_static_path = ["_static"] # Prefix document path to section labels, otherwise autogenerated labels would look like 'heading' @@ -69,8 +69,12 @@ def setup(app): - app.add_config_value('recommonmark_config', { - 'known_url_schemes': ['http', 'https', 'mailto'], - 'auto_toc_tree_section': 'Contents', - }, True) + app.add_config_value( + "recommonmark_config", + { + "known_url_schemes": ["http", "https", "mailto"], + "auto_toc_tree_section": "Contents", + }, + True, + ) app.add_transform(AutoStructify) diff --git a/docs/source/configuration.md b/docs/source/configuration.md index 178cb7a..735082b 100644 --- a/docs/source/configuration.md +++ b/docs/source/configuration.md @@ -2,7 +2,7 @@ Runtime Configuration ===================== ## Passing Configuration Options -Giftless can be configured by pointing it to a [`YAML`](https://yaml.org/) +Giftless can be configured by pointing it to a [`YAML`](https://yaml.org/) configuration file when starting, or through the use of environment variables. ```note:: Changes to any configuration options will only take effect when Giftless is restarted. @@ -21,12 +21,12 @@ EOF # start giftless export GIFTLESS_CONFIG_FILE=giftless.conf.yaml -uwsgi --module giftless.wsgi_entrypoint --callable app --http 127.0.0.1:8080 +uwsgi --module giftless.wsgi_entrypoint --callable app --http 127.0.0.1:8080 ``` ### As a YAML / JSON string passed as environment variable If you prefer not to use a configuration file, you can pass the same YAML content as the -value of the `GIFTLESS_CONFIG_STR` environment variable: +value of the `GIFTLESS_CONFIG_STR` environment variable: ```shell export GIFTLESS_CONFIG_STR=" @@ -44,14 +44,14 @@ export GIFTLESS_CONFIG_STR='{"AUTH_PROVIDERS":["giftless.auth.allow_anon:read_wr # Proceed to start Giftless ``` -```important:: +```important:: If you provide both a YAML file (as ``GIFTLESS_CONFIG_FILE``) and a literal YAML string (as ``GIFTLESS_CONFIG_STR``), the two will be merged, with values from the YAML string taking precedence over values from the YAML file. ``` ### By overriding specific options using environment variables -You can override some specific configuration options using environment variables, by +You can override some specific configuration options using environment variables, by exporting an environment variable that starts with `GIFTLESS_CONFIG_` and appends configuration object keys separated by underscores. @@ -77,54 +77,54 @@ GIFTLESS_CONFIG_TRANSFER_ADAPTERS_BASIC_OPTIONS_STORAGE_CLASS="mymodule:CustomSt ``` ### Using a `.env` file -If Giftless is started from a working directory that has a `.env` file, it will be loaded when Giftless is started -and used to set environment variables. +If Giftless is started from a working directory that has a `.env` file, it will be loaded when Giftless is started +and used to set environment variables. ## Configuration Options The following configuration options are accepted by Giftless: #### `TRANSFER_ADAPTERS` A set of transfer mode name -> transfer adapter configuration pairs. Controls transfer adapters and the storage backends -used by them. +used by them. -See the [Transfer Adapters](transfer-adapters.md) section for a full list of built-in transfer adapters and their -respective options. +See the [Transfer Adapters](transfer-adapters.md) section for a full list of built-in transfer adapters and their +respective options. You can configure multiple Git LFS transfer modes, each with its own transfer adapter and configuration. -The only transfer mode that is configured by default, and that is required by the Git LFS standard, is -`basic` mode. +The only transfer mode that is configured by default, and that is required by the Git LFS standard, is +`basic` mode. Each transfer adapter configuration value is an object with two keys: -* `factory` (required) - a string referencing a Python callable, in the form `package.module.submodule:callable`. +* `factory` (required) - a string referencing a Python callable, in the form `package.module.submodule:callable`. This callable should either be an adapter class, or a factory callable that returns an adapter instance. * `options` (optional) - a key-value dictionary of options to pass to the callable above. #### `AUTH_PROVIDERS` -An ordered list of authentication and authorization adapters to load. Each adapter can have different -options. +An ordered list of authentication and authorization adapters to load. Each adapter can have different +options. Auth providers are evaluated in the order that they are configured when a request is received, until one of them provides Giftless with a user identity. This allows supporting more than one authentication scheme in the same Giftless -instance. +instance. -See the [Auth Providers](auth-providers.md) section for a full list of supported auth providers and their +See the [Auth Providers](auth-providers.md) section for a full list of supported auth providers and their respective options. -Each auth provider can be specified either as a string of the form `package.module.submodule:callable`, +Each auth provider can be specified either as a string of the form `package.module.submodule:callable`, referencing a Python callable that returns the provider instance, or as an object with the following keys: * `factory` - a string of the same form referencing a callable * `options` - key-value pairs of arguments to pass to the callable #### `MIDDLEWARE` -An ordered list of custom WSGI middleware configuration. See [Using WSGI Middleware](wsgi-middleware.md) +An ordered list of custom WSGI middleware configuration. See [Using WSGI Middleware](wsgi-middleware.md) for details and examples. #### `PRE_AUTHORIZED_ACTION_PROVIDER` -Configures an additional single, special auth provider, which implements the `PreAuthorizedActionAuthenticator` +Configures an additional single, special auth provider, which implements the `PreAuthorizedActionAuthenticator` interface. This is used by Giftless when it needs to generate URLs referencing itself, and wants to pre-authorize clients using these URLs. By default, the JWT auth provider is used here. -There is typically no need to override the default behavior. +There is typically no need to override the default behavior. #### `DEBUG` If set to `true`, enables more verbose debugging output in logs. diff --git a/docs/source/installation.md b/docs/source/installation.md index 23c891f..ae609a3 100644 --- a/docs/source/installation.md +++ b/docs/source/installation.md @@ -4,8 +4,8 @@ Installation / Deployment You can install and run Giftless in different ways, depending on your needs: ## Running from Docker image -Giftless is available as a Docker image available from -[Docker Hub](https://hub.docker.com/r/datopian/giftless) +Giftless is available as a Docker image available from +[Docker Hub](https://hub.docker.com/r/datopian/giftless) To run the latest version of Giftless in HTTP mode, listening on port 8080, run: @@ -16,7 +16,7 @@ $ docker run --rm -p 8080:8080 datopian/giftless \ --http 0.0.0.0:8080 ``` -This will pull the image and run it. +This will pull the image and run it. Alternatively, to run in `WSGI` mode you can run: @@ -29,7 +29,7 @@ This will require an HTTP server such as *nginx* to proxy HTTP requests to it. If you need to, you can also build the Docker image locally as described below. ## Running from Pypi package -You can install Giftless into your Python environment of choice (3.7+) using pip. +You can install Giftless into your Python environment of choice (3.7+) using pip. It is recommended to install Giftless into a virtual environment: ```shell @@ -78,4 +78,4 @@ production environment: (venv) $ ./flask-develop.sh ``` -In development mode, Giftless will be listening on +In development mode, Giftless will be listening on diff --git a/docs/source/jwt-auth-guide.md b/docs/source/jwt-auth-guide.md index d7506c5..77b56c8 100644 --- a/docs/source/jwt-auth-guide.md +++ b/docs/source/jwt-auth-guide.md @@ -1,24 +1,24 @@ Setting Up JWT Authorization ============================ -This guide shows how Giftless could be set up to accept [JWT tokens](https://jwt.io/) as a mechanism for both -authentication and authorization, and shows some examples of manually generating JWT tokens and sending them to -Giftless. +This guide shows how Giftless could be set up to accept [JWT tokens](https://jwt.io/) as a mechanism for both +authentication and authorization, and shows some examples of manually generating JWT tokens and sending them to +Giftless. -JWT tokens are useful because they allow an external service to verify the user's identity and grant it specific access +JWT tokens are useful because they allow an external service to verify the user's identity and grant it specific access permissions to objects stored by Giftless. The external service generates a token which can be verified by Giftless, but -Giftless does not need to have any awareness of the granting party's authentication mechanism or authorization logic. +Giftless does not need to have any awareness of the granting party's authentication mechanism or authorization logic. -It is recommended that you read the [Authentication and Authorization overview]() section in -the documentation to get yourself familiar with some basic concepts. +It is recommended that you read the [Authentication and Authorization overview]() section in +the documentation to get yourself familiar with some basic concepts. -This tutorial assumes you have at least completed the [Getting Started](quickstart.md) guide, and have Giftless -configured to store objects either locally or [in Google Cloud Storage](using-gcs.md). The code samples assume you -are running in the same directory and virtual environment in which Giftless was installed in previous tutorials. +This tutorial assumes you have at least completed the [Getting Started](quickstart.md) guide, and have Giftless +configured to store objects either locally or [in Google Cloud Storage](using-gcs.md). The code samples assume you +are running in the same directory and virtual environment in which Giftless was installed in previous tutorials. ## Generate an RSA key pair JWT tokens can be signed and encrypted using different algorithms, but one common and often useful pattern is to use -an RSA public / private key pair. This allows distributing the public key, used for token verification, to multiple -(potentially untrusted) services while token generation is done using a secret key. +an RSA public / private key pair. This allows distributing the public key, used for token verification, to multiple +(potentially untrusted) services while token generation is done using a secret key. For this tutorial we will generate an RSA key pair: @@ -32,16 +32,16 @@ openssl rsa -in jwt-rs256.key -pubout -outform PEM -out jwt-rs256.key.pub Do not enter any passphrase when generating the private key. This will create two files in the current directory: `jwt-rsa256.key` which is the private key, and `jwt-rsa256.key.pub` -which is the public key. +which is the public key. ## Configure Giftless to use JWT -To configure Giftless to require JWT tokens in requests, modify the `AUTH_PROVIDERS` section in your Giftless config +To configure Giftless to require JWT tokens in requests, modify the `AUTH_PROVIDERS` section in your Giftless config file (`giftless.conf.yaml`) t0: ```yaml AUTH_PROVIDERS: - factory: giftless.auth.jwt:factory - options: + options: algorithm: RS256 public_key_file: jwt-rs256.key.pub ``` @@ -55,11 +55,11 @@ flask run ## Generating a JWT Token We now need to generate a JWT token that both authenticates us with Giftless, and carries some authorization information -in the form of granted *scopes*. These tell Giftless what access level (e.g. read or write) the user has to which -namespaces or objects. +in the form of granted *scopes*. These tell Giftless what access level (e.g. read or write) the user has to which +namespaces or objects. -In a production setting, JWT tokens will be generated by a special-purpose authorization service. For the purpose of -this tutorial, we will manually generate tokens using `pyjwt` - a command line tool that comes with the +In a production setting, JWT tokens will be generated by a special-purpose authorization service. For the purpose of +this tutorial, we will manually generate tokens using `pyjwt` - a command line tool that comes with the [PyJWT](https://pyjwt.readthedocs.io/en/stable/) Python library: ```note:: you can also use the debugging tool in https://jwt.io to generate tokens @@ -73,15 +73,15 @@ echo $JWT_TOKEN ``` This generates a JWT token identifying the user as `mr-robot`. The token is valid for 1 hour, and will grant both read -and write access to every object under the `my-organization` namespace. +and write access to every object under the `my-organization` namespace. ## Using JWT tokens with Git LFS clients ### Authenticating from custom Python code -[giftless-client](https://github.com/datopian/giftless-client-js) is a Python implementation of a Git LFS client with -some Giftless specific extras such as support for the `multipart-basic` transfer mode. +[giftless-client](https://github.com/datopian/giftless-client-js) is a Python implementation of a Git LFS client with +some Giftless specific extras such as support for the `multipart-basic` transfer mode. -To use the JWT token we have just generated with `giftless-client`, let's install it into our tutorial virtual +To use the JWT token we have just generated with `giftless-client`, let's install it into our tutorial virtual environment: ```shell @@ -96,22 +96,24 @@ from giftless_client import LfsClient def main(file): - token = os.getenv('JWT_TOKEN') # assuming we set the env var above - organization = 'my-organization' - repo = 'my-repo' - + token = os.getenv("JWT_TOKEN") # assuming we set the env var above + organization = "my-organization" + repo = "my-repo" + client = LfsClient( - lfs_server_url='http://127.0.0.1:5000', # Git LFS server URL - auth_token=token # JWT token + lfs_server_url="http://127.0.0.1:5000", # Git LFS server URL + auth_token=token, # JWT token ) - + result = client.upload(file, organization, repo) - print(f"Upload complete: Object ID {result['oid']}, {result['size']} bytes") + print( + f"Upload complete: Object ID {result['oid']}, {result['size']} bytes" + ) -if __name__ == '__main__': - with open(sys.argv[1], 'rb') as f: - main(f) +if __name__ == "__main__": + with open(sys.argv[1], "rb") as f: + main(f) ``` Assuming you have set the `JWT_TOKEN` environment variable as described above, run it using: @@ -135,16 +137,16 @@ Upload complete: Object ID 7e3dd874a5475c946e441e17181cfcd8fac7760bb373d38b93aee ``` ### Authenticating from Web-based Clients -One advantage of using JWT tokens for authentication and authorization is that they can be securely handed to Web -clients, which in turn can use them to upload and download files securely from LFS-managed storage, directly from a Web +One advantage of using JWT tokens for authentication and authorization is that they can be securely handed to Web +clients, which in turn can use them to upload and download files securely from LFS-managed storage, directly from a Web application. While we will not be demonstrating using Giftless to upload and download files from a browser, you can do this using -JWT tokens to authenticate. [giftless-client-js](https://github.com/datopian/giftless-client-js) is a JavaScript client -library which implements the Git LFS protocol with some Giftless specific "extras". You can look at the documentation -of this library to see some examples of how to do this. +JWT tokens to authenticate. [giftless-client-js](https://github.com/datopian/giftless-client-js) is a JavaScript client +library which implements the Git LFS protocol with some Giftless specific "extras". You can look at the documentation +of this library to see some examples of how to do this. -```important:: To communicate with Giftless from a browser, you will most likely need to +```important:: To communicate with Giftless from a browser, you will most likely need to :ref:`enable CORS support` in Giftless, or deploy Giftless on the same scheme / host / port which serves your Web application. ``` @@ -164,11 +166,11 @@ git push At this stage you will be asked for a username for your Giftless server. -Giftless allows passing a JWT token using the `Basic` HTTP authentication scheme. This is designed specifically to -support clients, such as `git`, that do not always support sending custom authentication tokens, and piggyback on the -`Basic` scheme which is widely supported. +Giftless allows passing a JWT token using the `Basic` HTTP authentication scheme. This is designed specifically to +support clients, such as `git`, that do not always support sending custom authentication tokens, and piggyback on the +`Basic` scheme which is widely supported. -To use this functionality with command line `git`, simply use `_jwt` as your username, and the JWT token as your +To use this functionality with command line `git`, simply use `_jwt` as your username, and the JWT token as your password. While we will not cover this as part of this tutorial, this functionality can be automated by configuring a @@ -176,8 +178,8 @@ While we will not cover this as part of this tutorial, this functionality can be ## Summary In this guide we have demonstrated one of Giftless' built-in authentication and authorization modes - JWT tokens. We -have covered how to configure Giftless to accept them, and demonstrated generating and using them from different -client environments. +have covered how to configure Giftless to accept them, and demonstrated generating and using them from different +client environments. Next, you can: * Learn more about the [JWT scopes accepted by Giftless](auth-providers:Scopes) diff --git a/docs/source/multipart-spec.md b/docs/source/multipart-spec.md index 8f4500d..d4d3f82 100644 --- a/docs/source/multipart-spec.md +++ b/docs/source/multipart-spec.md @@ -7,50 +7,50 @@ Date: 2020-10-09 Author: Shahar Evron ``` -This document describes the `multipart-basic` transfer mode for Git LFS. This is a protocol extension to Git LFS, +This document describes the `multipart-basic` transfer mode for Git LFS. This is a protocol extension to Git LFS, defining a new transfer mode to be implemented by Git LFS clients and servers. -Giftless is to be the first implementation of `multipart-basic`, but we hope that this transfer mode can be implemented -by other Git LFS implementations if it is found useful. +Giftless is to be the first implementation of `multipart-basic`, but we hope that this transfer mode can be implemented +by other Git LFS implementations if it is found useful. ## Reasoning -Many storage vendors and cloud vendors today offer an API to upload files in "parts" or "chunks", using multiple HTTP -requests, allowing improved stability and performance. This is especially handy when files are multiple gigabytes in -size, and a failure during the upload of a file would require re-uploading it, which could be extremely time consuming. +Many storage vendors and cloud vendors today offer an API to upload files in "parts" or "chunks", using multiple HTTP +requests, allowing improved stability and performance. This is especially handy when files are multiple gigabytes in +size, and a failure during the upload of a file would require re-uploading it, which could be extremely time consuming. -The purpose of the `multipart-basic` transfer mode is to allow Git LFS servers and client facilitate direct-to-storage -uploads for backends supporting multipart or chunked uploads. +The purpose of the `multipart-basic` transfer mode is to allow Git LFS servers and client facilitate direct-to-storage +uploads for backends supporting multipart or chunked uploads. -As the APIs offered by storage vendors differ greatly, `multipart-basic` transfer mode will offer abstraction over most -of these complexities in hope of supporting as many storage vendors as possible. +As the APIs offered by storage vendors differ greatly, `multipart-basic` transfer mode will offer abstraction over most +of these complexities in hope of supporting as many storage vendors as possible. ## Terminology Throughout this document, the following terms are in use: * *LFS Server* - The HTTP server to which the LFS `batch` request is sent * *Client* or *LFS Client* - a client using the Git LFS protocol to push large files to storage via an LFS server -* *Storage Backend* - The HTTP server handling actual storage; This may or may not be the same server as the LFS +* *Storage Backend* - The HTTP server handling actual storage; This may or may not be the same server as the LFS server, and for the purpose of this document, typically it is not. A typical implementation of this protocol would have -the Storage Backend be a cloud storage service such as Amazon S3 or Google Cloud Storage. +the Storage Backend be a cloud storage service such as Amazon S3 or Google Cloud Storage. ## Design Goals ### Must: * Abstract vendor specific API and flow into a generic protocol * Remain as close as possible to the `basic` transfer API -* Work at least with the multi-part APIs of - [Amazon S3](https://aws.amazon.com/s3/), - [Google Cloud Storage](https://cloud.google.com/storage) and - [Azure Blob Storage](https://azure.microsoft.com/en-us/services/storage/blobs/), +* Work at least with the multi-part APIs of + [Amazon S3](https://aws.amazon.com/s3/), + [Google Cloud Storage](https://cloud.google.com/storage) and + [Azure Blob Storage](https://azure.microsoft.com/en-us/services/storage/blobs/), ### Nice / Should: -* Define how uploads can be resumed by re-doing parts and not-redoing parts that were uploaded successfully +* Define how uploads can be resumed by re-doing parts and not-redoing parts that were uploaded successfully (this may be vendor specific and not always supported) * Offer a local storage adapter for testing purposes ## High Level Protocol Specs * The name of the transfer is `multipart-basic` -* Batch requests are the same as `basic` requests except that `{"transfers": ["multipart-basic", "basic"]}` is the - expected transfers value. Clients **must** retain `basic` as the fallback transfer mode to ensure compatiblity with +* Batch requests are the same as `basic` requests except that `{"transfers": ["multipart-basic", "basic"]}` is the + expected transfers value. Clients **must** retain `basic` as the fallback transfer mode to ensure compatiblity with servers not implementing this extension. * `{"operation": "download"}` replies work exactly like `basic` download request with no change * `{"operation": "upload"}` replies will break the upload into several `actions`: @@ -59,25 +59,25 @@ the Storage Backend be a cloud storage service such as Amazon S3 or Google Cloud * `commit` (optional), a request to finalize the upload * `abort` (optional), a request to abort the upload and clean up all unfinished chunks and state * `verify` (optional), a request to verify the file is in storage, similar to `basic` upload verify actions -* Just like `basic` transfers, if the file fully exists and is committed to storage, no `actions` will be provided +* Just like `basic` transfers, if the file fully exists and is committed to storage, no `actions` will be provided in the reply and the upload can simply be skipped -* Authentication and authorization behave just like with the `basic` protocol. +* Authentication and authorization behave just like with the `basic` protocol. ### Request Objects -The `init`, `commit`, `abort` and each one of the `parts` actions contain a "request spec". These are similar to `basic` -transfer adapter `actions` but in addition to `href`, `header` and `expires_in` may also include `method` (optional) and `body` -(optional) attributes, to indicate the HTTP request method and body. This allows the protocol to be vendor agnostic, -especially as the format of `init` and `commit` requests tends to vary greatly between storage backends. +The `init`, `commit`, `abort` and each one of the `parts` actions contain a "request spec". These are similar to `basic` +transfer adapter `actions` but in addition to `href`, `header` and `expires_in` may also include `method` (optional) and `body` +(optional) attributes, to indicate the HTTP request method and body. This allows the protocol to be vendor agnostic, +especially as the format of `init` and `commit` requests tends to vary greatly between storage backends. The default values for these fields depends on the action: * `init` defaults to no body and `POST` method * `commit` defaults to no body and `POST` method * `abort` defaults to no body and `POST` method -* `parts` requests default to `PUT` method and should include the file part as body, just like with `basic` transfer - adapters. +* `parts` requests default to `PUT` method and should include the file part as body, just like with `basic` transfer + adapters. -In addition, each `parts` request will include the `pos` attribute to indicate the position in bytes within the file in -which the part should begin, and `size` attribute to indicate the part size in bytes. If `pos` is omitted, default to +In addition, each `parts` request will include the `pos` attribute to indicate the position in bytes within the file in +which the part should begin, and `size` attribute to indicate the part size in bytes. If `pos` is omitted, default to `0` (beginning of the file). If `size` is omitted, default to read until the end of file. #### Request / Response Examples @@ -85,7 +85,7 @@ which the part should begin, and `size` attribute to indicate the part size in b ##### Upload Batch Request The following is a ~10mb file upload request: ```json -{ +{ "transfers": ["multipart-basic", "basic"], "operation": "upload", "objects": [ @@ -178,32 +178,32 @@ The following is a response for the same request, given an imaginary storage bac } ``` -As you can see, the `init` action is omitted as will be the case with many backend implementations (we assume -initialization, if needed, will most likely be done by the LFS server at the time of the batch request). +As you can see, the `init` action is omitted as will be the case with many backend implementations (we assume +initialization, if needed, will most likely be done by the LFS server at the time of the batch request). ### Chunk sizing It is up to the LFS server to decide the size of each file chunk. ### Uploaded Part Digest Some storage backends will support, or even require, uploading clients to send a digest of the uploaded part as part -of the request. This is a useful capability even if not required, as it allows backends to validate each part -separately as it is uploaded. +of the request. This is a useful capability even if not required, as it allows backends to validate each part +separately as it is uploaded. -To support this, `parts` request objects may include a `want_digest` value, which may be any value specified by +To support this, `parts` request objects may include a `want_digest` value, which may be any value specified by [RFC-3230](https://tools.ietf.org/html/rfc3230) or [RFC-5843](https://tools.ietf.org/html/rfc5843) (the design for this -feature is highly inspired by these RFCs). +feature is highly inspired by these RFCs). -RFC-3230 defines `contentMD5` as a special value which tells the client to send the Content-MD5 header with an MD5 +RFC-3230 defines `contentMD5` as a special value which tells the client to send the Content-MD5 header with an MD5 digest of the payload in base64 encoding. -Other possible values include a comma-separated list of q-factor flagged algorithms, one of MD5, SHA, SHA-256 and -SHA-512. Of one or more of these are specified, the digest of the payload is to be specified by the client as part of -the Digest header, using the format specified by +Other possible values include a comma-separated list of q-factor flagged algorithms, one of MD5, SHA, SHA-256 and +SHA-512. Of one or more of these are specified, the digest of the payload is to be specified by the client as part of +the Digest header, using the format specified by [RFC-3230 section 4.3.2](https://tools.ietf.org/html/rfc3230#section-4.3.1). -Clients, when receiving a `parts` object with a `want_digest` value, must include in the request to upload the part +Clients, when receiving a `parts` object with a `want_digest` value, must include in the request to upload the part a digest of the part, using the `Content-MD5` HTTP header (if `contentMD5` is specified as a value), or `Digest` HTTP -header for any other algorithm / `want_digest` value. +header for any other algorithm / `want_digest` value. #### Digest Control Examples @@ -268,60 +268,60 @@ For each of the `init`, `commit`, `abort` and `parts` requests sent by the clien expected: * Any response with a `20x` status code is to be considered by clients as successful. This ambiguity is by design, to -support variances between vendors (which may use `200` or `201` to indicate a successful upload, for example). +support variances between vendors (which may use `200` or `201` to indicate a successful upload, for example). * Any other response is to be considered as an error, and it is up to the client to decide whether the request should -be retried or not. Implementors are encouraged to follow standard HTTP error status code guidelines. +be retried or not. Implementors are encouraged to follow standard HTTP error status code guidelines. * An error such as `HTTP 409` on `commit` requests could indicates that not all the file parts have been uploaded successfully, thus it is not possible to commit the file. In such cases, clients are encouraged to issue a new `batch` -request to see if any parts need re-uploading. +request to see if any parts need re-uploading. -* An error such as `HTTP 409` on `verify` requests typically indicates that the file could not be verified. In this -case, clients may issue an `abort` request (if an `abort` action has been specified by the server), and then retry +* An error such as `HTTP 409` on `verify` requests typically indicates that the file could not be verified. In this +case, clients may issue an `abort` request (if an `abort` action has been specified by the server), and then retry the entire upload. Another approach here would be to retry the `batch` request to see if any parts are missing, however -in this case clients should take special care to avoid infinite re-upload loops and fail the entire process after a +in this case clients should take special care to avoid infinite re-upload loops and fail the entire process after a small number of attempts. - + #### `batch` replies for partially uploaded content When content was already partially uploaded, the server is expected to return a normal reply but omit request and parts which do not need to be repeated. If the entire file has been uploaded, it is expected that no `actions` value will be returned, in which case clients should simply skip the upload. However, if parts of the file were successfully uploaded while others weren't, it is expected that a normal reply would -be returned, but with less `parts` to send. +be returned, but with less `parts` to send. ## Storage Backend Implementation Considerations ### Hiding initialization / commit complexities from clients While `part` requests are typically quite similar between vendors, the specifics of multipart upload initialization and -commit procedures are very specific to vendors. For this reason, in many cases, it will be up to the LFS server to -take care of initialization and commit code. This is fine, as long as actual uploaded data is sent directly to the +commit procedures are very specific to vendors. For this reason, in many cases, it will be up to the LFS server to +take care of initialization and commit code. This is fine, as long as actual uploaded data is sent directly to the storage backend. For example, in the case of Amazon S3: * All requests need to have an "upload ID" token which is obtained in an initial request -* When finalizing the upload, a special "commit" request need to be sent, listing all uploaded part IDs. +* When finalizing the upload, a special "commit" request need to be sent, listing all uploaded part IDs. -These are very hard to abstract in a way that would allow clients to send them directly to the server. In addition, as +These are very hard to abstract in a way that would allow clients to send them directly to the server. In addition, as we do not want to maintain any state in the server, there is a need to make two requests when finalizing the upload: -one to fetch a list of uploaded chunks, and another to send this list to the S3 finalization endpoint. +one to fetch a list of uploaded chunks, and another to send this list to the S3 finalization endpoint. For this reason, in many cases storage backends will need to tell clients to send the `init` and `commit` requests -to the LFS server itself, where storage backend handler code will take care of initialization and finalization. It is +to the LFS server itself, where storage backend handler code will take care of initialization and finalization. It is even possible for backends to run some initialization code (such as getting an upload ID from AWS S3) during the initial `batch` request. ### Falling back to `basic` transfer for small files Using multipart upload APIs has some complexity and speed overhead, and for this reason it is recommended that servers -implement a "fallback" to `basic` transfers if the uploaded object is small enough to handle in a single part. +implement a "fallback" to `basic` transfers if the uploaded object is small enough to handle in a single part. -Clients *should* support such fallback natively, as it "rides" on existing transfer method negotiation capabilities. +Clients *should* support such fallback natively, as it "rides" on existing transfer method negotiation capabilities. -The server must simply respond with `{"transfer": "basic", ...}`, even if `mutipart-basic` was request by the client +The server must simply respond with `{"transfer": "basic", ...}`, even if `mutipart-basic` was request by the client and *is supported* by the server in order to achieve this. ### Request Lifetime Considerations -As multipart uploads tend to require much more time than simple uploads, it is recommended to allow for longer `"expires_in"` +As multipart uploads tend to require much more time than simple uploads, it is recommended to allow for longer `"expires_in"` values than one would consider for `basic` uploads. It is possible that the process of uploading a single object in multiple parts may take several hours from `init` to `commit`. diff --git a/docs/source/quickstart.md b/docs/source/quickstart.md index d79ee24..5c6bf6d 100644 --- a/docs/source/quickstart.md +++ b/docs/source/quickstart.md @@ -1,12 +1,12 @@ Getting Started =============== -This guide will introduce you to the basics of Giftless by getting it up and running locally, and seeing how it can -interact with a local git repository. +This guide will introduce you to the basics of Giftless by getting it up and running locally, and seeing how it can +interact with a local git repository. ## Prerequisites -This tutorial assumes you have Python 3.7 or newer available as `python`. On some systems, you might need to -replace `python` with `python3`. +This tutorial assumes you have Python 3.7 or newer available as `python`. On some systems, you might need to +replace `python` with `python3`. ## Installing and Running Locally Create a new directory for our tutorial, and set up a fresh virtual environment: @@ -17,10 +17,10 @@ python -m venv .venv source .venv/bin/activate ``` -Then, proceed to [install giftless from pypi]() as described in the -installation guide. +Then, proceed to [install giftless from pypi]() as described in the +installation guide. -```note:: For this tutorial, we will be using Flask's built-in development server. This is *not suitable* for production +```note:: For this tutorial, we will be using Flask's built-in development server. This is *not suitable* for production use. ``` @@ -31,18 +31,18 @@ export FLASK_APP=giftless.wsgi_entrypoint flask run ``` -You should see something like: +You should see something like: ```shell Running on http://127.0.0.1:5000/ (Press CTRL+C to quit) ``` This means Giftless is up and running with some default configuration on *localhost* port *5000*, with -the default configuration options. +the default configuration options. Hit Ctrl+C to stop Giftless. -## Basic Configuration +## Basic Configuration To configure Giftless, create a file named `giftless.conf.yaml` in the current directory with the following content: @@ -52,8 +52,8 @@ AUTH_PROVIDERS: - giftless.auth.allow_anon:read_write ``` -This will override the default read-only access mode, and allow open and full access to anyone, to any object stored -with Giftless. Clearly this is not useful in a production setting, but for a local test this will do fine. +This will override the default read-only access mode, and allow open and full access to anyone, to any object stored +with Giftless. Clearly this is not useful in a production setting, but for a local test this will do fine. Run Giftless again, pointing to this new configuration file: ```shell @@ -62,31 +62,31 @@ flask run ``` ## Interacting with git -We will now proceed to show how Giftless can interact with a local `git` repository, as a demonstration of how Git LFS +We will now proceed to show how Giftless can interact with a local `git` repository, as a demonstration of how Git LFS works. -Keep Giftless running and open a new terminal window or tab. +Keep Giftless running and open a new terminal window or tab. ### Install the `lfs` Git extension -While having a local installation of `git-lfs` is not required to run Giftless, you will need -it to follow this guide. +While having a local installation of `git-lfs` is not required to run Giftless, you will need +it to follow this guide. Run: ```shell git lfs version ``` -If you see an error indicating that `'lfs' is not a git command`, follow the +If you see an error indicating that `'lfs' is not a git command`, follow the [Git LFS installation instructions here](https://git-lfs.github.com/). On Linux, you may be able -to simply install the `git-lfs` package provided by your distro. +to simply install the `git-lfs` package provided by your distro. -```important:: If you have git-lfs older than version 2.10, you will need to upgrade it to follow this tutorial, +```important:: If you have git-lfs older than version 2.10, you will need to upgrade it to follow this tutorial, otherwise you may encounter some unexpected errors. Follow the instructions linked above to upgrade to the latest - version. + version. ``` ### Create a local "remote" repository -For the purpose of this tutorial, we will create a fake "remote" git repository on your local disk. This is analogous +For the purpose of this tutorial, we will create a fake "remote" git repository on your local disk. This is analogous to a real-world remote repository such as GitHub or any other Git remote, but is simpler to set up. ```shell @@ -95,8 +95,8 @@ git init --bare cd .. ``` -Of course, you may choose to use any other remote repository instead - just remember to replace the repository URL -in the upcoming `git clone` command. +Of course, you may choose to use any other remote repository instead - just remember to replace the repository URL +in the upcoming `git clone` command. ### Create a local repository and push some file Clone the remote repository we have just created to a local repository: @@ -119,16 +119,16 @@ repository, with the following content: *.bin filter=lfs diff=lfs merge=lfs -text ``` -Tell Git LFS where to find the Giftless server. We will do that by using the `git config` command to write to the -`.lfsconfig` file: +Tell Git LFS where to find the Giftless server. We will do that by using the `git config` command to write to the +`.lfsconfig` file: ```shell git config -f .lfsconfig lfs.url http://127.0.0.1:5000/my-organization/test-repo ``` -`my-organization/test-repo` is an organization / repository prefix under which your files will be stored. -Giftless requires all files to be stored under such prefix. +`my-organization/test-repo` is an organization / repository prefix under which your files will be stored. +Giftless requires all files to be stored under such prefix. -Tell git to track the configuration files we have just created. This will allow other users to have the same Git LFS +Tell git to track the configuration files we have just created. This will allow other users to have the same Git LFS configuration as us when cloning the repository: ```shell git add .gitattributes .lfsconfig @@ -137,7 +137,7 @@ Create some files and add them to git: ```shell # This README file will be committed to Git as usual echo "# This is a Giftless test" > README.md -# Let's also create a 1mb binary file which we'll want to store in Git LFS +# Let's also create a 1mb binary file which we'll want to store in Git LFS dd if=/dev/zero of=1mb-blob.bin bs=1024 count=1024 git add README.md 1mb-blob.bin ``` @@ -156,7 +156,7 @@ git push -u origin master ### See your objects stored by Giftless locally -Switch over to the shell in which Giftless is running, and you will see log messages indicating that a file has just +Switch over to the shell in which Giftless is running, and you will see log messages indicating that a file has just been pushed to storage and verified. This should be similar to: ``` @@ -165,7 +165,7 @@ INFO 127.0.0.1 - - "PUT /my-organization/test-repo/objects/storage/30e14955ebf13 INFO 127.0.0.1 - - "POST /my-organization/test-repo/objects/storage/verify HTTP/1.1" 200 - ``` -To further verify that the file has been stored by Giftless, we can list the files in our local Giftless storage +To further verify that the file has been stored by Giftless, we can list the files in our local Giftless storage directory: ```shell @@ -179,12 +179,12 @@ total 1024 ``` You will notice a 1mb file stored in `../lfs-storage/my-organization/test-repo` - this is identical to our `1mb-blob.bin` -file, but it is stored with its SHA256 digest as its name. +file, but it is stored with its SHA256 digest as its name. ## Summary You have now seen Giftless used as both a Git LFS server, and as a storage backend. This should give you a basic sense -of how to run Giftless, and how Git LFS servers interact with Git. +of how to run Giftless, and how Git LFS servers interact with Git. -In a real-world scenario, you would typically have Giftless serve as the Git LFS server but not as a storage backend - -storage will be off-loaded to a Cloud Storage service which has been configured for this purpose. +In a real-world scenario, you would typically have Giftless serve as the Git LFS server but not as a storage backend - +storage will be off-loaded to a Cloud Storage service which has been configured for this purpose. diff --git a/docs/source/storage-backends.md b/docs/source/storage-backends.md index f2d0ac9..eda1e66 100644 --- a/docs/source/storage-backends.md +++ b/docs/source/storage-backends.md @@ -2,20 +2,20 @@ Storage Backends ================ Storage Backend classes are responsible for managing and interacting with the -system that handles actual file storage, be it a local file system or a remote, -3rd party cloud based storage. +system that handles actual file storage, be it a local file system or a remote, +3rd party cloud based storage. Storage Adapters can implement one or more of several interfaces, which defines -the capabilities provided by the backend, and which -[transfer adapters](transfer-adapters.md) the backend can be used with. +the capabilities provided by the backend, and which +[transfer adapters](transfer-adapters.md) the backend can be used with. ## Types of Storage Backends Each storage backend adapter can implement one or more of the following interfaces: * **`StreamingStorage`** - provides APIs for streaming object upload / download -through the Giftless HTTP server. Works with the `basic_streaming` transfer -adapter. +through the Giftless HTTP server. Works with the `basic_streaming` transfer +adapter. * **`ExternalStorage`** - provides APIs for referring clients to upload / download objects using an external HTTP server. Works with the `basic_external` transfer adapter. Typically, these backends interact with Cloud Storage providers. @@ -36,9 +36,9 @@ TRANSFER_ADAPTERS: options: storage_class: giftless.storage.google_cloud:GoogleCloudStorage storage_options: - + # add an example here -``` +``` Built-In Storage Backends ------------------------- @@ -65,7 +65,7 @@ Modify your `giftless.yaml` file according to the following config: ### Google Cloud Storage -#### `giftless.storage.google_cloud:GoogleCloudStorage` +#### `giftless.storage.google_cloud:GoogleCloudStorage` To use Google Cloud Storage as a backend, you'll first need: * A Google Cloud Storage bucket to store objects in @@ -74,10 +74,10 @@ To use Google Cloud Storage as a backend, you'll first need: The key must be associated with either a user or a service account, and should have read / write permissions on objects in the bucket. -If you plan to access objects from a browser, your bucket needs to have +If you plan to access objects from a browser, your bucket needs to have [CORS enabled](https://cloud.google.com/storage/docs/configuring-cors). -You can deploy the account key JSON file and provide the path to it as +You can deploy the account key JSON file and provide the path to it as the `account_key_file` storage option: ```yaml @@ -93,7 +93,7 @@ TRANSFER_ADAPTERS: ``` Alternatively, you can base64-encode the contents of the JSON file and provide -it inline as `account_key_base64`: +it inline as `account_key_base64`: ```yaml TRANSFER_ADAPTERS: @@ -131,7 +131,7 @@ Modify your `giftless.yaml` file according to the following config: ``` #### boto3 authentication -`AwsS3Storage` supports 3 ways of authentication defined in more detail in +`AwsS3Storage` supports 3 ways of authentication defined in more detail in [docs](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html): 1. Environment variables 2. Shared credential file (~/.aws/credentials) @@ -148,7 +148,7 @@ $ export GIFTLESS_CONFIG_FILE=giftless.yaml You will need uWSGI running. Install it with your preferred package manager. Here is an example of how to run it: - + ```bash # Run uWSGI in HTTP mode on port 8080 $ uwsgi -M -T --threads 2 -p 2 --manage-script-name \ @@ -157,7 +157,7 @@ Here is an example of how to run it: #### Notes -* If you plan to access objects directly from a browser (e.g. using a JavaScript based Git LFS client library), +* If you plan to access objects directly from a browser (e.g. using a JavaScript based Git LFS client library), your GCS bucket needs to be [CORS enabled](https://cloud.google.com/storage/docs/configuring-cors). ### Local Filesystem Storage diff --git a/docs/source/transfer-adapters.md b/docs/source/transfer-adapters.md index 6c69529..08397fe 100644 --- a/docs/source/transfer-adapters.md +++ b/docs/source/transfer-adapters.md @@ -1,9 +1,9 @@ Transfer Adapters ================= -Git LFS servers and clients can implement and negotiate different -[transfer adapters](https://github.com/git-lfs/git-lfs/blob/master/docs/api/basic-transfers.md). -Typically, Git LFS will only define a `basic` transfer mode and support that. `basic` is simple -and efficient for direct-to-storage uploads for backends that support uploading using +Git LFS servers and clients can implement and negotiate different +[transfer adapters](https://github.com/git-lfs/git-lfs/blob/master/docs/api/basic-transfers.md). +Typically, Git LFS will only define a `basic` transfer mode and support that. `basic` is simple +and efficient for direct-to-storage uploads for backends that support uploading using a single `PUT` request. ## `basic` Transfer Mode @@ -12,29 +12,29 @@ a single `PUT` request. The `basic_external` transfer adapter is designed to facilitate LFS `basic` mode transfers (the default transfer mode of Git LFS) for setups in which the storage backends supports communicating directly with the Git LFS client. That is, files will be uploaded or downloaded directly from a storage service that supports HTTP `PUT` / `GET` based access, -without passing through Giftless. With this adapter, Giftless will not handle any file transfers - it will only be -responsible for providing the client with access to storage. +without passing through Giftless. With this adapter, Giftless will not handle any file transfers - it will only be +responsible for providing the client with access to storage. This transfer adapter works with storage adapters implementing the `ExternalStorage` storage interface - typically these -are Cloud storage service based backends. +are Cloud storage service based backends. ### Streaming `basic` transfer adapter The `basic_streaming` transfer adapter facilitates LFS `basic` mode transfers in which Giftless also handles object -upload, download and verification requests directly. This is less scalable and typically less performant than +upload, download and verification requests directly. This is less scalable and typically less performant than the `basic_external` adapter, as all data and potentially long-running HTTP requests must be passed through Giftless -and its Python runtime. However, in some situations this may be preferable to direct-to-storage HTTP requests. +and its Python runtime. However, in some situations this may be preferable to direct-to-storage HTTP requests. -`basic_streaming` supports local storage, and also streaming requests from some Cloud storage service backends such as -Azure and Google Cloud - although these tend to also support the `basic_external` transfer adapter. +`basic_streaming` supports local storage, and also streaming requests from some Cloud storage service backends such as +Azure and Google Cloud - although these tend to also support the `basic_external` transfer adapter. ## Multipart Transfer Mode To support more complex, and especially multi-part uploads (uploads done using more than one HTTP request, each with a different part of a large file) directly to backends -that support that, Giftless adds support for a non-standard `multipart-basic` transfer +that support that, Giftless adds support for a non-standard `multipart-basic` transfer mode. **NOTE**: `basic-multipart` is a non-standard transfer mode, and will not be supported -by most Git LFS clients; For a Python implementation of a Git LFS client library that +by most Git LFS clients; For a Python implementation of a Git LFS client library that does, see [giftless-client](https://github.com/datopian/giftless-client). ### Enabling Multipart Transfer Mode @@ -60,10 +60,10 @@ interface). Currently, these are: The following additional options are available for `multipart-basic` transfer adapter: -* `action_lifetime` - The maximal lifetime in seconds for signed multipart actions; Because multipart +* `action_lifetime` - The maximal lifetime in seconds for signed multipart actions; Because multipart uploads tend to be of very large files and can easily take hours to complete, we recommend setting this -to a few hours; The default is 6 hours. +to a few hours; The default is 6 hours. * `max_part_size` - Maximal length in bytes of a single part upload. The default is 10MB. - + See the specific storage adapter for additional backend-specific configuration options to be added under `storage_options`. diff --git a/docs/source/using-gcs.md b/docs/source/using-gcs.md index f4ab871..2dd3126 100644 --- a/docs/source/using-gcs.md +++ b/docs/source/using-gcs.md @@ -1,35 +1,35 @@ Using Google Cloud Storage as Backend ===================================== -This guide will walk you through configuring Giftless to use Google Cloud Storage (GCS) as a storage backend. Using a -cloud-based storage service such as GCS is highly recommended for production workloads and large files. +This guide will walk you through configuring Giftless to use Google Cloud Storage (GCS) as a storage backend. Using a +cloud-based storage service such as GCS is highly recommended for production workloads and large files. Our goal will be to run a local instance of Giftless, and interact with it using local `git` just as we did in the -[quickstart guide](quickstart.md), but our LFS tracked files will be uploaded to, and downloaded from, GCS directly - +[quickstart guide](quickstart.md), but our LFS tracked files will be uploaded to, and downloaded from, GCS directly - Giftless will not be handling any file transfers. -A list of all provided storage backends is available [here](storage-backends.md). +A list of all provided storage backends is available [here](storage-backends.md). ### Prerequisites * To use GCS you will need a Google Cloud account, and a Google Cloud project. Follow the [Google Cloud Storage quickstart guide](https://cloud.google.com/storage/docs/quickstart-console) to create these. -* To follow this guide you will need to have the `gcloud` SDK installed locally and configured to use your project. -Follow the [installation guide](https://cloud.google.com/sdk/docs/install), and then [authorize your gcloud +* To follow this guide you will need to have the `gcloud` SDK installed locally and configured to use your project. +Follow the [installation guide](https://cloud.google.com/sdk/docs/install), and then [authorize your gcloud installation](https://cloud.google.com/sdk/docs/authorizing) to access your project. -* If you already had `gcloud` installed before this tutorial, make sure you have configured `gcloud` to -use the correct account and project before following this guide. +* If you already had `gcloud` installed before this tutorial, make sure you have configured `gcloud` to +use the correct account and project before following this guide. ```important:: Using Google Cloud may incur some charges. It is recommended to remove any resources created during this tutorial. -``` +``` ## Set up a GCS Bucket and Service Account GCS stores files (or "objects") in containers named *buckets*. Giftless will need read/write access to such a bucket via -a *service account* - a software-only account with specific permissions. +a *service account* - a software-only account with specific permissions. **NOTE**: If you are familiar with Google Cloud Storage and are only interested in configuring Giftless to use it, and -have a bucket and service account key at ready, you can skip this part. +have a bucket and service account key at ready, you can skip this part. ### Create a GCP service account Create a GCP service account in the scope of our project: @@ -40,21 +40,21 @@ gcloud iam service-accounts create giftless-test \ ``` Then, run: ```shell -gcloud iam service-accounts list +gcloud iam service-accounts list ``` -The last command should list out the project's service account. Look for an email address +The last command should list out the project's service account. Look for an email address of the form: - giftless-test@.iam.gserviceaccount.com + giftless-test@.iam.gserviceaccount.com -This address is the identifier of the account we have just created - we will need it in the next steps. +This address is the identifier of the account we have just created - we will need it in the next steps. ### Create a GCS bucket and grant access to it Create a bucket named `giftless-storage`: ```shell -gsutil mb gs://giftless-storage +gsutil mb gs://giftless-storage ``` Then grant our service account access to the bucket: @@ -65,8 +65,8 @@ gsutil iam ch \ gs://giftless-storage ``` -Replace `giftless-test@.iam.gserviceaccount.com` with the email address copied above. This will grant -the account read and write access to any object in the bucket, but will not allow it to modify the bucket itself. +Replace `giftless-test@.iam.gserviceaccount.com` with the email address copied above. This will grant +the account read and write access to any object in the bucket, but will not allow it to modify the bucket itself. ### Download an account key In order to authenticate as our service account, Giftless will need a GCP Account Key JSON file. This can be created @@ -80,15 +80,15 @@ gcloud iam service-accounts keys create giftless-gcp-key.json \ (again, replace `giftless-test@.iam.gserviceaccount.com` with the correct address) This will create a file in the current directory named `giftless-gcp-key.json` - this is a secret key and should not be -shared or stored in a non-secure manner. +shared or stored in a non-secure manner. ## Configure Giftless to use GCS To use Google Cloud Storage as a storage backend and have upload and download requests be sent directly to GCS without -passing through Giftless, we need to configure Giftless to use the `basic_external` transfer adapter with +passing through Giftless, we need to configure Giftless to use the `basic_external` transfer adapter with `GoogleCloudStorage` as storage backend. -Assuming you have followed the [getting started](quickstart.md) guide to set up Giftless, edit your configuration +Assuming you have followed the [getting started](quickstart.md) guide to set up Giftless, edit your configuration YAML file (previously named `giftless.conf.yaml`) and add the `TRANSFER_ADAPTERS` section: ```yaml @@ -116,27 +116,27 @@ flask run ## Upload and download files using local `git` Follow the [quick start guide section titled "Interacting with git"]() -to see that you can push LFS tracked files to your Git repository. However, you will notice a few differences: +to see that you can push LFS tracked files to your Git repository. However, you will notice a few differences: -* The `git push` command may be slightly slower this time, as our 1mb file is upload to Google Cloud via the Internet +* The `git push` command may be slightly slower this time, as our 1mb file is upload to Google Cloud via the Internet and not over the loopback network. * The Giftless logs will show only two lines, and not three - something like: - + INFO 127.0.0.1 - - "POST /my-organization/test-repo/objects/batch HTTP/1.1" 200 - INFO 127.0.0.1 - - "POST /my-organization/test-repo/objects/storage/verify HTTP/1.1" 200 - - + This is because the `PUT` request to do the actual upload was sent directly to Google Cloud by `git-lfs`, and not to - your local Giftless instance. + your local Giftless instance. * You will not see any files stored locally this time -Behind the scenes, what happens with this setup is that when the Git LFS client asks Giftless to upload an object, -Giftless will respond by providing the client with a URL to upload the file(s) to. This URL will be a pre-signed GCP -URL, allowing temporary, limited access to write the specific file to our GCP bucket. The Git LFS client will then -proceed to upload the file using that URL, and then call Giftless again to verify that the file has been uploaded -properly. +Behind the scenes, what happens with this setup is that when the Git LFS client asks Giftless to upload an object, +Giftless will respond by providing the client with a URL to upload the file(s) to. This URL will be a pre-signed GCP +URL, allowing temporary, limited access to write the specific file to our GCP bucket. The Git LFS client will then +proceed to upload the file using that URL, and then call Giftless again to verify that the file has been uploaded +properly. ### Check that your object is in GCS -You can check that the object has been uploaded to your GCS bucket by running: +You can check that the object has been uploaded to your GCS bucket by running: ```shell gsutil ls gs://giftless-storage/my-organization/test-repo/ @@ -147,7 +147,7 @@ gs://giftless-storage/my-organization/test-repo/30e14955ebf1352266dc2ff8067e6810 ``` ### Download Objects from Git LFS -To see how downloads work with Git LFS and Giftless, let's create yet another local clone of our repository. This +To see how downloads work with Git LFS and Giftless, let's create yet another local clone of our repository. This simulates another user pulling from the same repository on a different machine: ```shell @@ -158,12 +158,12 @@ cd other-repo You should now see that the `1mb-blob.bin` file exists in the other local repository, and is 1mb in size. The Gitless logs should show one more line, detailing the request made by `git-lfs` to request access to the file in storage. The -file itself has been pulled from GCS. +file itself has been pulled from GCS. ## Summary In this guide, we have seen how to configure Giftless to use GCP as a storage backend. We have seen that Giftless, and -other Git LFS servers, do not need (and in fact typically should not) serve as a file storage service, but in fact -serve as a "gateway" to our storage backend. +other Git LFS servers, do not need (and in fact typically should not) serve as a file storage service, but in fact +serve as a "gateway" to our storage backend. -The Google Cloud Storage backend has some additional options. See the full list of options for the Google Cloud +The Google Cloud Storage backend has some additional options. See the full list of options for the Google Cloud Storage backend [here](storage-backends.html#google-cloud-storage) diff --git a/docs/source/wsgi-middleware.md b/docs/source/wsgi-middleware.md index 9e3b532..9ae3447 100644 --- a/docs/source/wsgi-middleware.md +++ b/docs/source/wsgi-middleware.md @@ -1,9 +1,9 @@ # Using WSGI Middleware -Another way Giftless allows customizing its behavior is using standard -[WSGI middleware](https://en.wikipedia.org/wiki/Web_Server_Gateway_Interface#WSGI_middleware). +Another way Giftless allows customizing its behavior is using standard +[WSGI middleware](https://en.wikipedia.org/wiki/Web_Server_Gateway_Interface#WSGI_middleware). This includes both publicly available middleware libraries, or your own custom -WSGI middleware code. +WSGI middleware code. ## Enabling Custom WSGI Middleware @@ -20,19 +20,19 @@ Where: * `class` is a `:` reference to a WSGI module and class name, or a callable that returns a WSGI object * `args` is a list of arguments to pass to the specified callable -* `kwargs` are key-value pair of keyword arguments to pass to the specified callable. +* `kwargs` are key-value pair of keyword arguments to pass to the specified callable. The middleware module must be installed in the same Python environment as Giftless -for it to be loaded. +for it to be loaded. ## Useful Middleware Examples -Here are some examples of solving specific needs using WSGI middleware: +Here are some examples of solving specific needs using WSGI middleware: ### HOWTO: Fixing Generated URLs when Running Behind a Proxy If you have Giftless running behind a reverse proxy, and available publicly at a custom hostname / port / path / scheme that is not known to -Giftless, you might have an issue where generated URLs are not accessible. +Giftless, you might have an issue where generated URLs are not accessible. This can be fixed by enabling the `ProxyFix` Werkzeug middleware, which is already installed along with Giftless: @@ -46,12 +46,12 @@ MIDDLEWARE: x_prefix: 1 ``` -In order for this to work, you must ensure your reverse proxy (e.g. nginx) -sets the right `X-Forwarded-*` headers when passing requests. +In order for this to work, you must ensure your reverse proxy (e.g. nginx) +sets the right `X-Forwarded-*` headers when passing requests. -For example, if you have deployed giftless in an endpoint that is available to -clients at `https://example.com/lfs`, the following nginx configuration is -expected, in addition to the Giftless configuration set in the `MIDDLEWARE` +For example, if you have deployed giftless in an endpoint that is available to +clients at `https://example.com/lfs`, the following nginx configuration is +expected, in addition to the Giftless configuration set in the `MIDDLEWARE` section: ``` @@ -62,20 +62,20 @@ section: ``` This example assumes Giftless is available to the reverse proxy at -`giftless.internal.host` port 5000. In addition, `X-Forwarded-Host`, +`giftless.internal.host` port 5000. In addition, `X-Forwarded-Host`, `X-Forwarded-Port`, `X-Forwarded-Proto` are automatically set by nginx by -default. +default. ### HOWTO: CORS Support -If you need to access Giftless from a browser, you may need to ensure -Giftless sends proper [CORS](https://developer.mozilla.org/en-US/docs/Web/HTTP/CORS) -headers, otherwise browsers may reject responses from Giftless. +If you need to access Giftless from a browser, you may need to ensure +Giftless sends proper [CORS](https://developer.mozilla.org/en-US/docs/Web/HTTP/CORS) +headers, otherwise browsers may reject responses from Giftless. There are a number of CORS WSGI middleware implementations available on PyPI, -and you can use any of them to add CORS headers control support to Giftless. +and you can use any of them to add CORS headers control support to Giftless. -For example, you can enable CORS support using +For example, you can enable CORS support using [wsgi-cors-middleware](https://github.com/moritzmhmk/wsgi-cors-middleware): ```bash diff --git a/giftless/app.py b/giftless/app.py index ad6f2a8..9d768fd 100644 --- a/giftless/app.py +++ b/giftless/app.py @@ -13,20 +13,20 @@ def init_app(app=None, additional_config=None): - """Flask app initialization - """ + """Flask app initialization""" if app is None: app = Flask(__name__) config.configure(app, additional_config=additional_config) # Configure logging - if os.environ.get('GIFTLESS_DEBUG'): + if os.environ.get("GIFTLESS_DEBUG"): level = logging.DEBUG else: level = logging.WARNING - logging.basicConfig(format='%(asctime)-15s %(name)-15s %(levelname)s %(message)s', - level=level) + logging.basicConfig( + format="%(asctime)-15s %(name)-15s %(levelname)s %(message)s", level=level + ) # Load middleware _load_middleware(app) @@ -46,16 +46,15 @@ def init_app(app=None, additional_config=None): def _load_middleware(flask_app: Flask) -> None: - """Load WSGI middleware classes from configuration - """ + """Load WSGI middleware classes from configuration""" log = logging.getLogger(__name__) wsgi_app = flask_app.wsgi_app - middleware_config = flask_app.config['MIDDLEWARE'] + middleware_config = flask_app.config["MIDDLEWARE"] for spec in middleware_config: - klass = get_callable(spec['class']) - args = spec.get('args', []) - kwargs = spec.get('kwargs', {}) + klass = get_callable(spec["class"]) + args = spec.get("args", []) + kwargs = spec.get("kwargs", {}) wsgi_app = klass(wsgi_app, *args, **kwargs) log.debug("Loaded middleware: %s(*%s, **%s)", klass, args, kwargs) diff --git a/giftless/auth/__init__.py b/giftless/auth/__init__.py index f7f1ad0..7583b87 100644 --- a/giftless/auth/__init__.py +++ b/giftless/auth/__init__.py @@ -25,8 +25,11 @@ class Authenticator(Protocol): """Authenticators are callables (an object or function) that can authenticate a request and provide an identity object """ + def __call__(self, request: Request) -> Optional[Identity]: - raise NotImplementedError('This is a protocol definition and should not be called directly') + raise NotImplementedError( + "This is a protocol definition and should not be called directly" + ) class PreAuthorizedActionAuthenticator(abc.ABC): @@ -36,24 +39,34 @@ class PreAuthorizedActionAuthenticator(abc.ABC): They serve to both pre-authorize Git LFS actions and check these actions are authorized as they come in. """ - def get_authz_query_params(self, identity: Identity, org: str, repo: str, actions: Optional[Set[str]] = None, - oid: Optional[str] = None, lifetime: Optional[int] = None) -> Dict[str, str]: - """Authorize an action by adding credientaisl to the query string - """ + + def get_authz_query_params( + self, + identity: Identity, + org: str, + repo: str, + actions: Optional[Set[str]] = None, + oid: Optional[str] = None, + lifetime: Optional[int] = None, + ) -> Dict[str, str]: + """Authorize an action by adding credientaisl to the query string""" return {} - def get_authz_header(self, identity: Identity, org: str, repo: str, actions: Optional[Set[str]] = None, - oid: Optional[str] = None, lifetime: Optional[int] = None) -> Dict[str, str]: - """Authorize an action by adding credentials to the request headers - """ + def get_authz_header( + self, + identity: Identity, + org: str, + repo: str, + actions: Optional[Set[str]] = None, + oid: Optional[str] = None, + lifetime: Optional[int] = None, + ) -> Dict[str, str]: + """Authorize an action by adding credentials to the request headers""" return {} class Authentication: - - def __init__( - self, app=None, default_identity: Optional[Identity] = None - ) -> None: + def __init__(self, app=None, default_identity: Optional[Identity] = None) -> None: self._default_identity = default_identity self._authenticators: List[Authenticator] = [] self._unauthorized_handler: Optional[Callable] = None @@ -63,13 +76,12 @@ def __init__( self.init_app(app) def init_app(self, app): - """Initialize the Flask app - """ - app.config.setdefault('AUTH_PROVIDERS', []) - app.config.setdefault('PRE_AUTHORIZED_ACTION_PROVIDER', None) + """Initialize the Flask app""" + app.config.setdefault("AUTH_PROVIDERS", []) + app.config.setdefault("PRE_AUTHORIZED_ACTION_PROVIDER", None) def get_identity(self) -> Optional[Identity]: - if hasattr(g, 'user') and isinstance(g.user, Identity): + if hasattr(g, "user") and isinstance(g.user, Identity): return g.user log = logging.getLogger(__name__) @@ -82,14 +94,15 @@ def get_identity(self) -> Optional[Identity]: return None def login_required(self, f): - """A typical Flask "login_required" view decorator - """ + """A typical Flask "login_required" view decorator""" + @wraps(f) def decorated_function(*args, **kwargs): user = self.get_identity() if not user: return self.auth_failure() return f(*args, **kwargs) + return decorated_function def no_identity_handler(self, f): @@ -107,16 +120,14 @@ def decorated_func(*args, **kwargs): return decorated_func def auth_failure(self): - """Trigger an authentication failure - """ + """Trigger an authentication failure""" if self._unauthorized_handler: return self._unauthorized_handler() else: raise Unauthorized("User identity is required") def init_authenticators(self, reload=False): - """Register an authenticator function - """ + """Register an authenticator function""" if reload: self._authenticators = None @@ -124,24 +135,28 @@ def init_authenticators(self, reload=False): return log = logging.getLogger(__name__) - log.debug("Initializing authenticators, have %d authenticator(s) configured", - len(current_app.config['AUTH_PROVIDERS'])) + log.debug( + "Initializing authenticators, have %d authenticator(s) configured", + len(current_app.config["AUTH_PROVIDERS"]), + ) - self._authenticators = [_create_authenticator(a) for a in current_app.config['AUTH_PROVIDERS']] + self._authenticators = [ + _create_authenticator(a) for a in current_app.config["AUTH_PROVIDERS"] + ] - if current_app.config['PRE_AUTHORIZED_ACTION_PROVIDER']: + if current_app.config["PRE_AUTHORIZED_ACTION_PROVIDER"]: log.debug("Initializing pre-authorized action provider") - self.preauth_handler = _create_authenticator(current_app.config['PRE_AUTHORIZED_ACTION_PROVIDER']) + self.preauth_handler = _create_authenticator( + current_app.config["PRE_AUTHORIZED_ACTION_PROVIDER"] + ) self.push_authenticator(self.preauth_handler) def push_authenticator(self, authenticator): - """Push an authenticator at the top of the stack - """ + """Push an authenticator at the top of the stack""" self._authenticators.insert(0, authenticator) def _authenticate(self) -> Optional[Identity]: - """Call all registered authenticators until we find an identity - """ + """Call all registered authenticators until we find an identity""" self.init_authenticators() for authn in self._authenticators: try: @@ -172,9 +187,9 @@ def _create_authenticator(spec: Union[str, Dict[str, Any]]) -> Authenticator: log.debug("Creating authenticator: %s", spec) return get_callable(spec, __name__) - log.debug("Creating authenticator using factory: %s", spec['factory']) - factory = get_callable(spec['factory'], __name__) # type: Callable[..., Authenticator] - options = spec.get('options', {}) + log.debug("Creating authenticator using factory: %s", spec["factory"]) + factory = get_callable(spec["factory"], __name__) # type: Callable[..., Authenticator] + options = spec.get("options", {}) return factory(**options) diff --git a/giftless/auth/allow_anon.py b/giftless/auth/allow_anon.py index d52eef8..f1eadcc 100644 --- a/giftless/auth/allow_anon.py +++ b/giftless/auth/allow_anon.py @@ -17,25 +17,23 @@ class AnonymousUser(DefaultIdentity): - """An anonymous user object - """ + """An anonymous user object""" + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) if self.name is None: - self.name = 'anonymous' + self.name = "anonymous" def read_only(_): - """Dummy authenticator that gives read-only permissions to everyone - """ + """Dummy authenticator that gives read-only permissions to everyone""" user = AnonymousUser() user.allow(permissions={Permission.READ, Permission.READ_META}) return user def read_write(_): - """Dummy authenticator that gives full permissions to everyone - """ + """Dummy authenticator that gives full permissions to everyone""" user = AnonymousUser() user.allow(permissions=Permission.all()) return user diff --git a/giftless/auth/identity.py b/giftless/auth/identity.py index c59825a..c16363e 100644 --- a/giftless/auth/identity.py +++ b/giftless/auth/identity.py @@ -5,18 +5,20 @@ class Permission(Enum): - """System wide permissions - """ - READ = 'read' - READ_META = 'read-meta' - WRITE = 'write' + """System wide permissions""" + + READ = "read" + READ_META = "read-meta" + WRITE = "write" @classmethod - def all(cls) -> Set['Permission']: + def all(cls) -> Set["Permission"]: return set(cls) -PermissionTree = Dict[Optional[str], Dict[Optional[str], Dict[Optional[str], Set[Permission]]]] +PermissionTree = Dict[ + Optional[str], Dict[Optional[str], Dict[Optional[str], Set[Permission]]] +] class Identity(ABC): @@ -25,36 +27,59 @@ class Identity(ABC): The goal of user objects is to contain some information about the user, and also to allow checking if the user is authorized to perform some actions. """ + name: Optional[str] = None id: Optional[str] = None email: Optional[str] = None @abstractmethod - def is_authorized(self, organization: str, repo: str, permission: Permission, oid: Optional[str] = None) -> bool: - """Tell if user is authorized to perform an operation on an object / repo - """ + def is_authorized( + self, + organization: str, + repo: str, + permission: Permission, + oid: Optional[str] = None, + ) -> bool: + """Tell if user is authorized to perform an operation on an object / repo""" pass def __repr__(self): - return '<{} id:{} name:{}>'.format(self.__class__.__name__, self.id, self.name) + return "<{} id:{} name:{}>".format(self.__class__.__name__, self.id, self.name) class DefaultIdentity(Identity): - - def __init__(self, name: Optional[str] = None, id: Optional[str] = None, email: Optional[str] = None): + def __init__( + self, + name: Optional[str] = None, + id: Optional[str] = None, + email: Optional[str] = None, + ): self.name = name self.id = id self.email = email - self._allowed: PermissionTree = defaultdict(lambda: defaultdict(lambda: defaultdict(set))) - - def allow(self, organization: Optional[str] = None, repo: Optional[str] = None, - permissions: Optional[Set[Permission]] = None, oid: Optional[str] = None): + self._allowed: PermissionTree = defaultdict( + lambda: defaultdict(lambda: defaultdict(set)) + ) + + def allow( + self, + organization: Optional[str] = None, + repo: Optional[str] = None, + permissions: Optional[Set[Permission]] = None, + oid: Optional[str] = None, + ): if permissions is None: self._allowed[organization][repo][oid] = set() else: self._allowed[organization][repo][oid].update(permissions) - def is_authorized(self, organization: str, repo: str, permission: Permission, oid: Optional[str] = None) -> bool: + def is_authorized( + self, + organization: str, + repo: str, + permission: Permission, + oid: Optional[str] = None, + ) -> bool: if organization in self._allowed: if repo in self._allowed[organization]: if oid in self._allowed[organization][repo]: diff --git a/giftless/auth/jwt.py b/giftless/auth/jwt.py index e1582a6..e022b23 100644 --- a/giftless/auth/jwt.py +++ b/giftless/auth/jwt.py @@ -96,15 +96,24 @@ class JWTAuthenticator(PreAuthorizedActionAuthenticator): considered when checking expiry times, to cover for clock skew between servers. """ - DEFAULT_ALGORITHM = 'HS256' + + DEFAULT_ALGORITHM = "HS256" DEFAULT_LIFETIME = 60 DEFAULT_LEEWAY = 10 - DEFAULT_BASIC_AUTH_USER = '_jwt' - - def __init__(self, private_key: Optional[Union[str, bytes]] = None, default_lifetime: int = DEFAULT_LIFETIME, - algorithm: str = DEFAULT_ALGORITHM, public_key: Optional[str] = None, issuer: Optional[str] = None, - audience: Optional[str] = None, leeway: int = DEFAULT_LEEWAY, key_id: Optional[str] = None, - basic_auth_user: Optional[str] = DEFAULT_BASIC_AUTH_USER): + DEFAULT_BASIC_AUTH_USER = "_jwt" + + def __init__( + self, + private_key: Optional[Union[str, bytes]] = None, + default_lifetime: int = DEFAULT_LIFETIME, + algorithm: str = DEFAULT_ALGORITHM, + public_key: Optional[str] = None, + issuer: Optional[str] = None, + audience: Optional[str] = None, + leeway: int = DEFAULT_LEEWAY, + key_id: Optional[str] = None, + basic_auth_user: Optional[str] = DEFAULT_BASIC_AUTH_USER, + ): self.algorithm = algorithm self.default_lifetime = default_lifetime self.leeway = leeway @@ -125,79 +134,90 @@ def __call__(self, request: Request) -> Optional[Identity]: def get_authz_header(self, *args, **kwargs) -> Dict[str, str]: token = self._generate_token_for_action(*args, **kwargs) - return {'Authorization': f'Bearer {token}'} + return {"Authorization": f"Bearer {token}"} def get_authz_query_params(self, *args, **kwargs) -> Dict[str, str]: - return {'jwt': self._generate_token_for_action(*args, **kwargs)} - - def _generate_token_for_action(self, identity: Identity, org: str, repo: str, actions: Optional[Set[str]] = None, - oid: Optional[str] = None, lifetime: Optional[int] = None) -> str: - """Generate a JWT token authorizing the specific requested action - """ + return {"jwt": self._generate_token_for_action(*args, **kwargs)} + + def _generate_token_for_action( + self, + identity: Identity, + org: str, + repo: str, + actions: Optional[Set[str]] = None, + oid: Optional[str] = None, + lifetime: Optional[int] = None, + ) -> str: + """Generate a JWT token authorizing the specific requested action""" token_payload: Dict[str, Any] = {"sub": identity.id} if self.issuer: - token_payload['iss'] = self.issuer + token_payload["iss"] = self.issuer if self.audience: - token_payload['aud'] = self.audience + token_payload["aud"] = self.audience if identity.email: - token_payload['email'] = identity.email + token_payload["email"] = identity.email if identity.name: - token_payload['name'] = identity.name + token_payload["name"] = identity.name # Scopes - token_payload['scopes'] = self._generate_action_scopes(org, repo, actions, oid) + token_payload["scopes"] = self._generate_action_scopes(org, repo, actions, oid) # Custom lifetime if lifetime: - token_payload['exp'] = datetime.now(tz=UTC) + timedelta(seconds=lifetime) + token_payload["exp"] = datetime.now(tz=UTC) + timedelta(seconds=lifetime) return self._generate_token(**token_payload) @staticmethod - def _generate_action_scopes(org: str, repo: str, actions: Optional[Set[str]] = None, oid: Optional[str] = None) \ - -> str: - """Generate token scopes based on target object and actions - """ + def _generate_action_scopes( + org: str, + repo: str, + actions: Optional[Set[str]] = None, + oid: Optional[str] = None, + ) -> str: + """Generate token scopes based on target object and actions""" if oid is None: - oid = '*' - obj_id = f'{org}/{repo}/{oid}' - return str(Scope('obj', obj_id, actions)) + oid = "*" + obj_id = f"{org}/{repo}/{oid}" + return str(Scope("obj", obj_id, actions)) def _generate_token(self, **kwargs) -> str: - """Generate a JWT token that can be used later to authenticate a request - """ + """Generate a JWT token that can be used later to authenticate a request""" if not self.private_key: - raise ValueError("This authenticator is not configured to generate tokens; Set private_key to fix") + raise ValueError( + "This authenticator is not configured to generate tokens; Set private_key to fix" + ) payload: Dict[str, Any] = { "exp": datetime.now(tz=UTC) + timedelta(seconds=self.default_lifetime), "iat": datetime.now(tz=UTC), - "nbf": datetime.now(tz=UTC) + "nbf": datetime.now(tz=UTC), } payload.update(**kwargs) if self.issuer: - payload['iss'] = self.issuer + payload["iss"] = self.issuer if self.audience: - payload['aud'] = self.audience + payload["aud"] = self.audience headers = {} if self.key_id: - headers['kid'] = self.key_id + headers["kid"] = self.key_id - token = jwt.encode(payload, self.private_key, algorithm=self.algorithm, headers=headers) + token = jwt.encode( + payload, self.private_key, algorithm=self.algorithm, headers=headers + ) # Type of jwt.encode() went from bytes to str in jwt 2.x, but the # typing hints somehow aren't keeping up. This lets us do the # right thing with jwt 2.x. if isinstance(token, str): return token # type: ignore - return token.decode('ascii') + return token.decode("ascii") def _authenticate(self, request: Request): - """Authenticate a request - """ + """Authenticate a request""" token = self._get_token_from_headers(request) if token is None: token = self._get_token_from_qs(request) @@ -207,16 +227,23 @@ def _authenticate(self, request: Request): # Check if this is a JWT token, and if it has the expected key ID try: header = jwt.get_unverified_header(token) - if self.key_id and self.key_id != header.get('kid'): + if self.key_id and self.key_id != header.get("kid"): return None except jwt.PyJWTError: return None # We got a JWT token, now let's decode and verify it try: - return jwt.decode(token, key=self._get_verification_key(), algorithms=self.algorithm, leeway=self.leeway) + return jwt.decode( + token, + key=self._get_verification_key(), + algorithms=self.algorithm, + leeway=self.leeway, + ) except jwt.PyJWTError as e: - raise Unauthorized('Expired or otherwise invalid JWT token ({})'.format(str(e))) + raise Unauthorized( + "Expired or otherwise invalid JWT token ({})".format(str(e)) + ) def _get_token_from_headers(self, request: Request) -> Optional[str]: """Extract JWT token from HTTP Authorization header @@ -225,7 +252,7 @@ def _get_token_from_headers(self, request: Request) -> Optional[str]: and basic auth JWT payload has not been disabled, and the provided username matches the configured JWT token username, we will try to use the provided password as if it was a JWT token. """ - header = request.headers.get('Authorization') + header = request.headers.get("Authorization") if not header: return None @@ -234,10 +261,10 @@ def _get_token_from_headers(self, request: Request) -> Optional[str]: except ValueError: return None - if authz_type.lower() == 'bearer': + if authz_type.lower() == "bearer": self._log.debug("Found token in Authorization: Bearer header") return payload - elif authz_type.lower() == 'basic' and self.basic_auth_user: + elif authz_type.lower() == "basic" and self.basic_auth_user: parsed_header = Authorization.from_header(header) if parsed_header and parsed_header.username == self.basic_auth_user: self._log.debug("Found token in Authorization: Basic header") @@ -249,16 +276,17 @@ def _get_token_from_headers(self, request: Request) -> Optional[str]: @staticmethod def _get_token_from_qs(request: Request) -> Optional[str]: - """Get JWT token from the query string - """ - return request.args.get('jwt') + """Get JWT token from the query string""" + return request.args.get("jwt") def _get_identity(self, jwt_payload: Dict[str, Any]) -> Identity: - identity = DefaultIdentity(id=jwt_payload.get('sub'), - email=jwt_payload.get('email'), - name=jwt_payload.get('name', jwt_payload.get('sub'))) + identity = DefaultIdentity( + id=jwt_payload.get("sub"), + email=jwt_payload.get("email"), + name=jwt_payload.get("name", jwt_payload.get("sub")), + ) - scopes = to_iterable(jwt_payload.get('scopes', ())) + scopes = to_iterable(jwt_payload.get("scopes", ())) self._log.debug("Allowing scopes: %s", scopes) for scope in scopes: identity.allow(**self._parse_scope(scope)) @@ -266,10 +294,9 @@ def _get_identity(self, jwt_payload: Dict[str, Any]) -> Identity: return identity def _parse_scope(self, scope_str: str) -> Dict[str, Any]: - """Parse a scope string and convert it into arguments for Identity.allow() - """ + """Parse a scope string and convert it into arguments for Identity.allow()""" scope = Scope.from_string(scope_str) - if scope.entity_type != 'obj': + if scope.entity_type != "obj": return {} organization = None @@ -277,7 +304,9 @@ def _parse_scope(self, scope_str: str) -> Dict[str, Any]: oid = None if scope.entity_ref is not None: - id_parts = [p if p != '*' else None for p in scope.entity_ref.split('/', maxsplit=2)] + id_parts = [ + p if p != "*" else None for p in scope.entity_ref.split("/", maxsplit=2) + ] if len(id_parts) == 3: organization, repo, oid = id_parts elif len(id_parts) == 2: @@ -287,18 +316,21 @@ def _parse_scope(self, scope_str: str) -> Dict[str, Any]: permissions = self._parse_scope_permissions(scope) - return {"organization": organization, - "repo": repo, - "permissions": permissions, - "oid": oid} + return { + "organization": organization, + "repo": repo, + "permissions": permissions, + "oid": oid, + } @staticmethod - def _parse_scope_permissions(scope: 'Scope') -> Set[Permission]: - """Extract granted permissions from scope object - """ - permissions_map = {'read': {Permission.READ, Permission.READ_META}, - 'write': {Permission.WRITE}, - 'verify': {Permission.READ_META}} + def _parse_scope_permissions(scope: "Scope") -> Set[Permission]: + """Extract granted permissions from scope object""" + permissions_map = { + "read": {Permission.READ, Permission.READ_META}, + "write": {Permission.WRITE}, + "verify": {Permission.READ_META}, + } permissions = set() if scope.actions: @@ -307,84 +339,91 @@ def _parse_scope_permissions(scope: 'Scope') -> Set[Permission]: else: permissions = Permission.all() - if scope.subscope in {'metadata', 'meta'}: + if scope.subscope in {"metadata", "meta"}: permissions = permissions.intersection({Permission.READ_META}) return permissions def _get_verification_key(self) -> Union[str, bytes]: - """Get the key used for token verification, based on algorithm - """ + """Get the key used for token verification, based on algorithm""" if self._verification_key is None: - if self.algorithm[0:2] == 'HS': + if self.algorithm[0:2] == "HS": self._verification_key = self.private_key else: self._verification_key = self.public_key if self._verification_key is None: - raise ValueError("No private or public key have been set, can't verify requests") + raise ValueError( + "No private or public key have been set, can't verify requests" + ) return self._verification_key class Scope(object): - """Scope object - """ + """Scope object""" entity_type = None subscope = None entity_ref = None actions = None - def __init__(self, entity_type: str, entity_id: Optional[str] = None, actions: Optional[Set[str]] = None, - subscope: Optional[str] = None): + def __init__( + self, + entity_type: str, + entity_id: Optional[str] = None, + actions: Optional[Set[str]] = None, + subscope: Optional[str] = None, + ): self.entity_type = entity_type self.entity_ref = entity_id self.actions = actions self.subscope = subscope def __repr__(self): - return ''.format(str(self)) + return "".format(str(self)) def __str__(self): - """Convert scope to a string - """ + """Convert scope to a string""" parts = [self.entity_type] - entity_ref = self.entity_ref if self.entity_ref != '*' else None - subscobe = self.subscope if self.subscope != '*' else None - actions = ','.join(sorted(self.actions)) if self.actions and self.actions != '*' else None + entity_ref = self.entity_ref if self.entity_ref != "*" else None + subscobe = self.subscope if self.subscope != "*" else None + actions = ( + ",".join(sorted(self.actions)) + if self.actions and self.actions != "*" + else None + ) if entity_ref: parts.append(entity_ref) elif subscobe or actions: - parts.append('*') + parts.append("*") if subscobe: parts.append(subscobe) if not actions: - parts.append('*') + parts.append("*") if actions: parts.append(actions) - return ':'.join(parts) + return ":".join(parts) @classmethod def from_string(cls, scope_str): - """Create a scope object from string - """ - parts = scope_str.split(':') + """Create a scope object from string""" + parts = scope_str.split(":") if len(parts) < 1: raise ValueError("Scope string should have at least 1 part") scope = cls(parts[0]) - if len(parts) > 1 and parts[1] != '*': + if len(parts) > 1 and parts[1] != "*": scope.entity_ref = parts[1] - if len(parts) == 3 and parts[2] != '*': + if len(parts) == 3 and parts[2] != "*": scope.actions = cls._parse_actions(parts[2]) if len(parts) == 4: - if parts[2] != '*': + if parts[2] != "*": scope.subscope = parts[2] - if parts[3] != '*': + if parts[3] != "*": scope.actions = cls._parse_actions(parts[3]) return scope @@ -393,12 +432,12 @@ def from_string(cls, scope_str): def _parse_actions(cls, actions_str: str) -> Set[str]: if not actions_str: return set() - return set(actions_str.split(',')) + return set(actions_str.split(",")) def factory(**options): - for key_type in ('private_key', 'public_key'): - file_opt = f'{key_type}_file' + for key_type in ("private_key", "public_key"): + file_opt = f"{key_type}_file" try: if options[file_opt]: with open(options[file_opt]) as f: diff --git a/giftless/config.py b/giftless/config.py index 2e6a622..29ae682 100644 --- a/giftless/config.py +++ b/giftless/config.py @@ -7,71 +7,69 @@ import yaml from dotenv import load_dotenv -ENV_PREFIX = 'GIFTLESS_' -ENV_FILE = '.env' +ENV_PREFIX = "GIFTLESS_" +ENV_FILE = ".env" default_transfer_config = { - "basic": figcan.Extensible({ - "factory": "giftless.transfer.basic_streaming:factory", - "options": figcan.Extensible({ - "storage_class": "giftless.storage.local_storage:LocalStorage", - "storage_options": figcan.Extensible({ - "path": "lfs-storage" - }), - "action_lifetime": 900, - }) - }), + "basic": figcan.Extensible( + { + "factory": "giftless.transfer.basic_streaming:factory", + "options": figcan.Extensible( + { + "storage_class": "giftless.storage.local_storage:LocalStorage", + "storage_options": figcan.Extensible({"path": "lfs-storage"}), + "action_lifetime": 900, + } + ), + } + ), } default_config = { "TRANSFER_ADAPTERS": figcan.Extensible(default_transfer_config), "TESTING": False, "DEBUG": False, - "AUTH_PROVIDERS": [ - 'giftless.auth.allow_anon:read_only' - ], + "AUTH_PROVIDERS": ["giftless.auth.allow_anon:read_only"], "PRE_AUTHORIZED_ACTION_PROVIDER": { - 'factory': 'giftless.auth.jwt:factory', - 'options': { - 'algorithm': 'HS256', - 'private_key': 'change-me', - 'private_key_file': None, - 'public_key': None, - 'public_key_file': None, - 'default_lifetime': 60, # 60 seconds for default actions - 'key_id': 'giftless-internal-jwt-key', - } + "factory": "giftless.auth.jwt:factory", + "options": { + "algorithm": "HS256", + "private_key": "change-me", + "private_key_file": None, + "public_key": None, + "public_key_file": None, + "default_lifetime": 60, # 60 seconds for default actions + "key_id": "giftless-internal-jwt-key", + }, }, - "MIDDLEWARE": [] + "MIDDLEWARE": [], } load_dotenv() def configure(app, additional_config: Optional[Dict] = None): - """Configure a Flask app using Figcan managed configuration object - """ + """Configure a Flask app using Figcan managed configuration object""" config = _compose_config(additional_config) app.config.update(config) return app def _compose_config(additional_config: Optional[Dict] = None) -> figcan.Configuration: - """Compose configuration object from all available sources - """ + """Compose configuration object from all available sources""" config = figcan.Configuration(default_config) environ = dict(os.environ) # Copy the environment as we're going to change it - if environ.get(f'{ENV_PREFIX}CONFIG_FILE'): - with open(environ[f'{ENV_PREFIX}CONFIG_FILE']) as f: + if environ.get(f"{ENV_PREFIX}CONFIG_FILE"): + with open(environ[f"{ENV_PREFIX}CONFIG_FILE"]) as f: config_from_file = yaml.safe_load(f) config.apply(config_from_file) - environ.pop(f'{ENV_PREFIX}CONFIG_FILE') + environ.pop(f"{ENV_PREFIX}CONFIG_FILE") - if environ.get(f'{ENV_PREFIX}CONFIG_STR'): - config_from_file = yaml.safe_load(environ[f'{ENV_PREFIX}CONFIG_STR']) + if environ.get(f"{ENV_PREFIX}CONFIG_STR"): + config_from_file = yaml.safe_load(environ[f"{ENV_PREFIX}CONFIG_STR"]) config.apply(config_from_file) - environ.pop(f'{ENV_PREFIX}CONFIG_STR') + environ.pop(f"{ENV_PREFIX}CONFIG_STR") config.apply_flat(environ, prefix=ENV_PREFIX) # type: ignore diff --git a/giftless/error_handling.py b/giftless/error_handling.py index 969cd08..ed99bc6 100644 --- a/giftless/error_handling.py +++ b/giftless/error_handling.py @@ -8,7 +8,6 @@ class ApiErrorHandler: - def __init__(self, app=None): if app: self.init_app(app) @@ -19,9 +18,8 @@ def init_app(self, app): @classmethod def error_as_json(cls, ex): - """Handle errors by returning a JSON response - """ - code = ex.code if hasattr(ex, 'code') else 500 + """Handle errors by returning a JSON response""" + code = ex.code if hasattr(ex, "code") else 500 data = {"message": str(ex)} return output_git_lfs_json(data=data, code=code) diff --git a/giftless/exc.py b/giftless/exc.py index 1c5c1c1..799e8d8 100644 --- a/giftless/exc.py +++ b/giftless/exc.py @@ -8,4 +8,4 @@ InvalidPayload = UnprocessableEntity -__all__ = ['NotFound', 'Forbidden', 'InvalidPayload'] +__all__ = ["NotFound", "Forbidden", "InvalidPayload"] diff --git a/giftless/representation.py b/giftless/representation.py index 2a05acd..1b89771 100644 --- a/giftless/representation.py +++ b/giftless/representation.py @@ -11,24 +11,24 @@ from flask import make_response -GIT_LFS_MIME_TYPE = 'application/vnd.git-lfs+json' +GIT_LFS_MIME_TYPE = "application/vnd.git-lfs+json" class CustomJsonEncoder(json.JSONEncoder): - """Custom JSON encoder that can support some additional required types - """ + """Custom JSON encoder that can support some additional required types""" + def default(self, o): if isinstance(o, datetime): return o.isoformat() return super().default(o) -def output_json(data, code, headers=None, content_type='application/json'): +def output_json(data, code, headers=None, content_type="application/json"): dumped = json.dumps(data, cls=CustomJsonEncoder) if headers: - headers.update({'Content-Type': content_type}) + headers.update({"Content-Type": content_type}) else: - headers = {'Content-Type': content_type} + headers = {"Content-Type": content_type} response = make_response(dumped, code, headers) return response diff --git a/giftless/schema.py b/giftless/schema.py index a0d9bff..0a22c7e 100644 --- a/giftless/schema.py +++ b/giftless/schema.py @@ -11,21 +11,21 @@ class Operation(Enum): - """Batch operations - """ - upload = 'upload' - download = 'download' + """Batch operations""" + + upload = "upload" + download = "download" class RefSchema(ma.Schema): # type: ignore - """ref field schema - """ + """ref field schema""" + name = fields.String(required=True) class ObjectSchema(ma.Schema): # type: ignore - """object field schema - """ + """object field schema""" + oid = fields.String(required=True) size = fields.Integer(required=True, validate=validate.Range(min=0)) @@ -36,19 +36,20 @@ def set_extra_fields(self, data, **_): extra = {} rest = {} for k, v in data.items(): - if k.startswith('x-'): + if k.startswith("x-"): extra[k[2:]] = v else: rest[k] = v - return {'extra': extra, **rest} + return {"extra": extra, **rest} class BatchRequest(ma.Schema): # type: ignore - operation = EnumField(Operation, required=True) - transfers = fields.List(fields.String, required=False, missing=['basic']) + transfers = fields.List(fields.String, required=False, missing=["basic"]) ref = fields.Nested(RefSchema, required=False) - objects = fields.Nested(ObjectSchema, validate=validate.Length(min=1), many=True, required=True) + objects = fields.Nested( + ObjectSchema, validate=validate.Length(min=1), many=True, required=True + ) batch_request_schema = BatchRequest(unknown=marshmallow.EXCLUDE) diff --git a/giftless/storage/__init__.py b/giftless/storage/__init__.py index d603ffd..d51a45d 100644 --- a/giftless/storage/__init__.py +++ b/giftless/storage/__init__.py @@ -10,6 +10,7 @@ class VerifiableStorage(ABC): All streaming backends should be 'verifiable'. """ + @abstractmethod def verify_object(self, prefix: str, oid: str, size: int) -> bool: """Check that object exists and has the right size @@ -20,8 +21,8 @@ def verify_object(self, prefix: str, oid: str, size: int) -> bool: class StreamingStorage(VerifiableStorage, ABC): - """Interface for streaming storage adapters - """ + """Interface for streaming storage adapters""" + @abstractmethod def get(self, prefix: str, oid: str) -> Iterable[bytes]: pass @@ -42,8 +43,7 @@ def get_mime_type(self, prefix: str, oid: str) -> Optional[str]: return "application/octet-stream" def verify_object(self, prefix: str, oid: str, size: int): - """Verify that an object exists - """ + """Verify that an object exists""" try: return self.get_size(prefix, oid) == size except exc.ObjectNotFound: @@ -51,16 +51,28 @@ def verify_object(self, prefix: str, oid: str, size: int): class ExternalStorage(VerifiableStorage, ABC): - """Interface for streaming storage adapters - """ + """Interface for streaming storage adapters""" + @abstractmethod - def get_upload_action(self, prefix: str, oid: str, size: int, expires_in: int, - extra: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: + def get_upload_action( + self, + prefix: str, + oid: str, + size: int, + expires_in: int, + extra: Optional[Dict[str, Any]] = None, + ) -> Dict[str, Any]: pass @abstractmethod - def get_download_action(self, prefix: str, oid: str, size: int, expires_in: int, - extra: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: + def get_download_action( + self, + prefix: str, + oid: str, + size: int, + expires_in: int, + extra: Optional[Dict[str, Any]] = None, + ) -> Dict[str, Any]: pass @abstractmethod @@ -80,13 +92,26 @@ def verify_object(self, prefix: str, oid: str, size: int) -> bool: class MultipartStorage(VerifiableStorage, ABC): @abstractmethod - def get_multipart_actions(self, prefix: str, oid: str, size: int, part_size: int, expires_in: int, - extra: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: + def get_multipart_actions( + self, + prefix: str, + oid: str, + size: int, + part_size: int, + expires_in: int, + extra: Optional[Dict[str, Any]] = None, + ) -> Dict[str, Any]: pass @abstractmethod - def get_download_action(self, prefix: str, oid: str, size: int, expires_in: int, - extra: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: + def get_download_action( + self, + prefix: str, + oid: str, + size: int, + expires_in: int, + extra: Optional[Dict[str, Any]] = None, + ) -> Dict[str, Any]: pass @abstractmethod diff --git a/giftless/storage/amazon_s3.py b/giftless/storage/amazon_s3.py index cd9579a..687cd77 100644 --- a/giftless/storage/amazon_s3.py +++ b/giftless/storage/amazon_s3.py @@ -12,19 +12,24 @@ class AmazonS3Storage(StreamingStorage, ExternalStorage): - """AWS S3 Blob Storage backend. - """ - - def __init__(self, bucket_name: str, path_prefix: Optional[str] = None, endpoint: Optional[str] = None, **_): + """AWS S3 Blob Storage backend.""" + + def __init__( + self, + bucket_name: str, + path_prefix: Optional[str] = None, + endpoint: Optional[str] = None, + **_, + ): self.bucket_name = bucket_name self.path_prefix = path_prefix - self.s3 = boto3.resource('s3', endpoint_url=endpoint) - self.s3_client = boto3.client('s3', endpoint_url=endpoint) + self.s3 = boto3.resource("s3", endpoint_url=endpoint) + self.s3_client = boto3.client("s3", endpoint_url=endpoint) def get(self, prefix: str, oid: str) -> Iterable[bytes]: if not self.exists(prefix, oid): raise ObjectNotFound() - result: Iterable[bytes] = self._s3_object(prefix, oid).get()['Body'] + result: Iterable[bytes] = self._s3_object(prefix, oid).get()["Body"] return result def put(self, prefix: str, oid: str, data_stream: BinaryIO) -> int: @@ -34,7 +39,9 @@ def upload_callback(size): completed.append(size) bucket = self.s3.Bucket(self.bucket_name) - bucket.upload_fileobj(data_stream, self._get_blob_path(prefix, oid), Callback=upload_callback) + bucket.upload_fileobj( + data_stream, self._get_blob_path(prefix, oid), Callback=upload_callback + ) return sum(completed) def exists(self, prefix: str, oid: str) -> bool: @@ -48,26 +55,30 @@ def get_size(self, prefix: str, oid: str) -> int: try: result: int = self._s3_object(prefix, oid).content_length except botocore.exceptions.ClientError as e: - if e.response['Error']['Code'] == "404": + if e.response["Error"]["Code"] == "404": raise ObjectNotFound() else: raise e return result - def get_upload_action(self, prefix: str, oid: str, size: int, expires_in: int, - extra: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: - - base64_oid = base64.b64encode(binascii.a2b_hex(oid)).decode('ascii') + def get_upload_action( + self, + prefix: str, + oid: str, + size: int, + expires_in: int, + extra: Optional[Dict[str, Any]] = None, + ) -> Dict[str, Any]: + base64_oid = base64.b64encode(binascii.a2b_hex(oid)).decode("ascii") params = { - 'Bucket': self.bucket_name, - 'Key': self._get_blob_path(prefix, oid), - 'ContentType': 'application/octet-stream', - 'ChecksumSHA256': base64_oid, + "Bucket": self.bucket_name, + "Key": self._get_blob_path(prefix, oid), + "ContentType": "application/octet-stream", + "ChecksumSHA256": base64_oid, } - response = self.s3_client.generate_presigned_url('put_object', - Params=params, - ExpiresIn=expires_in - ) + response = self.s3_client.generate_presigned_url( + "put_object", Params=params, ExpiresIn=expires_in + ) return { "actions": { "upload": { @@ -76,48 +87,44 @@ def get_upload_action(self, prefix: str, oid: str, size: int, expires_in: int, "Content-Type": "application/octet-stream", "x-amz-checksum-sha256": base64_oid, }, - "expires_in": expires_in + "expires_in": expires_in, } } } - def get_download_action(self, prefix: str, oid: str, size: int, expires_in: int, - extra: Optional[Dict[str, str]] = None) -> Dict[str, Any]: + def get_download_action( + self, + prefix: str, + oid: str, + size: int, + expires_in: int, + extra: Optional[Dict[str, str]] = None, + ) -> Dict[str, Any]: + params = {"Bucket": self.bucket_name, "Key": self._get_blob_path(prefix, oid)} - params = { - 'Bucket': self.bucket_name, - 'Key': self._get_blob_path(prefix, oid) - } - - filename = extra.get('filename') if extra else None - disposition = extra.get('disposition', 'attachment') if extra else 'attachment' + filename = extra.get("filename") if extra else None + disposition = extra.get("disposition", "attachment") if extra else "attachment" if filename and disposition: filename = safe_filename(filename) - params['ResponseContentDisposition'] = f'attachment; filename="{filename}"' + params["ResponseContentDisposition"] = f'attachment; filename="{filename}"' elif disposition: - params['ResponseContentDisposition'] = disposition + params["ResponseContentDisposition"] = disposition - response = self.s3_client.generate_presigned_url('get_object', - Params=params, - ExpiresIn=expires_in - ) + response = self.s3_client.generate_presigned_url( + "get_object", Params=params, ExpiresIn=expires_in + ) return { "actions": { - "download": { - "href": response, - "header": {}, - "expires_in": expires_in - } + "download": {"href": response, "header": {}, "expires_in": expires_in} } } def _get_blob_path(self, prefix: str, oid: str) -> str: - """Get the path to a blob in storage - """ + """Get the path to a blob in storage""" if not self.path_prefix: - storage_prefix = '' - elif self.path_prefix[0] == '/': + storage_prefix = "" + elif self.path_prefix[0] == "/": storage_prefix = self.path_prefix[1:] else: storage_prefix = self.path_prefix diff --git a/giftless/storage/azure.py b/giftless/storage/azure.py index 897153a..3cbda07 100644 --- a/giftless/storage/azure.py +++ b/giftless/storage/azure.py @@ -8,13 +8,23 @@ from xml.sax.saxutils import escape as xml_escape from azure.core.exceptions import ResourceNotFoundError -from azure.storage.blob import BlobClient, BlobSasPermissions, BlobServiceClient, generate_blob_sas # type: ignore - -from giftless.storage import ExternalStorage, MultipartStorage, StreamingStorage, guess_mime_type_from_filename +from azure.storage.blob import ( + BlobClient, + BlobSasPermissions, + BlobServiceClient, + generate_blob_sas, +) # type: ignore + +from giftless.storage import ( + ExternalStorage, + MultipartStorage, + StreamingStorage, + guess_mime_type_from_filename, +) from .exc import ObjectNotFound -Block = namedtuple('Block', ['id', 'start', 'size']) +Block = namedtuple("Block", ["id", "start", "size"]) _log = logging.getLogger(__name__) @@ -26,24 +36,34 @@ class AzureBlobsStorage(StreamingStorage, ExternalStorage, MultipartStorage): _PART_ID_BYTE_SIZE = 16 - def __init__(self, connection_string: str, container_name: str, path_prefix: Optional[str] = None, - enable_content_digest: bool = True, **_): + def __init__( + self, + connection_string: str, + container_name: str, + path_prefix: Optional[str] = None, + enable_content_digest: bool = True, + **_, + ): self.container_name = container_name self.path_prefix = path_prefix - self.blob_svc_client: BlobServiceClient = BlobServiceClient.from_connection_string(connection_string) + self.blob_svc_client: BlobServiceClient = ( + BlobServiceClient.from_connection_string(connection_string) + ) self.enable_content_digest = enable_content_digest def get(self, prefix: str, oid: str) -> Iterable[bytes]: - blob_client = self.blob_svc_client.get_blob_client(container=self.container_name, - blob=self._get_blob_path(prefix, oid)) + blob_client = self.blob_svc_client.get_blob_client( + container=self.container_name, blob=self._get_blob_path(prefix, oid) + ) try: return blob_client.download_blob().chunks() # type: ignore except ResourceNotFoundError: raise ObjectNotFound("Object does not exist") def put(self, prefix: str, oid: str, data_stream: IO[bytes]) -> int: - blob_client = self.blob_svc_client.get_blob_client(container=self.container_name, - blob=self._get_blob_path(prefix, oid)) + blob_client = self.blob_svc_client.get_blob_client( + container=self.container_name, blob=self._get_blob_path(prefix, oid) + ) blob_client.upload_blob(data_stream) # type: ignore return data_stream.tell() @@ -56,8 +76,9 @@ def exists(self, prefix: str, oid: str) -> bool: def get_size(self, prefix: str, oid: str) -> int: try: - blob_client = self.blob_svc_client.get_blob_client(container=self.container_name, - blob=self._get_blob_path(prefix, oid)) + blob_client = self.blob_svc_client.get_blob_client( + container=self.container_name, blob=self._get_blob_path(prefix, oid) + ) props = blob_client.get_blob_properties() return props.size # type: ignore except ResourceNotFoundError: @@ -65,26 +86,36 @@ def get_size(self, prefix: str, oid: str) -> int: def get_mime_type(self, prefix: str, oid: str) -> Optional[str]: try: - blob_client = self.blob_svc_client.get_blob_client(container=self.container_name, - blob=self._get_blob_path(prefix, oid)) + blob_client = self.blob_svc_client.get_blob_client( + container=self.container_name, blob=self._get_blob_path(prefix, oid) + ) props = blob_client.get_blob_properties() mime_type = props.content_settings.get( - "content_type", "application/octet-stream") + "content_type", "application/octet-stream" + ) return mime_type # type: ignore except ResourceNotFoundError: raise ObjectNotFound("Object does not exist") - def get_upload_action(self, prefix: str, oid: str, size: int, expires_in: int, - extra: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: - filename = extra.get('filename') if extra else None + def get_upload_action( + self, + prefix: str, + oid: str, + size: int, + expires_in: int, + extra: Optional[Dict[str, Any]] = None, + ) -> Dict[str, Any]: + filename = extra.get("filename") if extra else None headers = { "x-ms-blob-type": "BlockBlob", } reply = { "actions": { "upload": { - "href": self._get_signed_url(prefix, oid, expires_in, filename, create=True), - "expires_in": expires_in + "href": self._get_signed_url( + prefix, oid, expires_in, filename, create=True + ), + "expires_in": expires_in, } } } @@ -98,33 +129,61 @@ def get_upload_action(self, prefix: str, oid: str, size: int, expires_in: int, return reply - def get_download_action(self, prefix: str, oid: str, size: int, expires_in: int, - extra: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: - filename = extra.get('filename') if extra else None - disposition = extra.get('disposition', 'attachment') if extra else 'attachment' + def get_download_action( + self, + prefix: str, + oid: str, + size: int, + expires_in: int, + extra: Optional[Dict[str, Any]] = None, + ) -> Dict[str, Any]: + filename = extra.get("filename") if extra else None + disposition = extra.get("disposition", "attachment") if extra else "attachment" return { "actions": { "download": { - "href": self._get_signed_url(prefix, oid, expires_in, filename, disposition=disposition, read=True), + "href": self._get_signed_url( + prefix, + oid, + expires_in, + filename, + disposition=disposition, + read=True, + ), "header": {}, - "expires_in": expires_in + "expires_in": expires_in, } } } - def get_multipart_actions(self, prefix: str, oid: str, size: int, part_size: int, expires_in: int, - extra: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: - """Get actions for a multipart upload - """ + def get_multipart_actions( + self, + prefix: str, + oid: str, + size: int, + part_size: int, + expires_in: int, + extra: Optional[Dict[str, Any]] = None, + ) -> Dict[str, Any]: + """Get actions for a multipart upload""" blocks = _calculate_blocks(size, part_size) uncommitted = self._get_uncommitted_blocks(prefix, oid, blocks) - filename = extra.get('filename') if extra else None - base_url = self._get_signed_url(prefix, oid, expires_in, filename, create=True, write=True, delete=True) - parts = [self._create_part_request(base_url, b, expires_in) for b in blocks if b.id not in uncommitted] - _log.info("There are %d uncommitted blocks pre-uploaded; %d parts still need to be uploaded", - len(uncommitted), len(parts)) + filename = extra.get("filename") if extra else None + base_url = self._get_signed_url( + prefix, oid, expires_in, filename, create=True, write=True, delete=True + ) + parts = [ + self._create_part_request(base_url, b, expires_in) + for b in blocks + if b.id not in uncommitted + ] + _log.info( + "There are %d uncommitted blocks pre-uploaded; %d parts still need to be uploaded", + len(uncommitted), + len(parts), + ) commit_body = self._create_commit_body(blocks) reply: Dict[str, Any] = { "actions": { @@ -132,109 +191,134 @@ def get_multipart_actions(self, prefix: str, oid: str, size: int, part_size: int "method": "PUT", "href": f"{base_url}&{urlencode({'comp': 'blocklist'})}", "body": commit_body, - "header": { - "Content-type": "text/xml; charset=utf8" - }, - "expires_in": expires_in + "header": {"Content-type": "text/xml; charset=utf8"}, + "expires_in": expires_in, }, "abort": { "method": "DELETE", "href": base_url, - "expires_in": expires_in - } + "expires_in": expires_in, + }, } } if filename: mime_type = guess_mime_type_from_filename(filename) if mime_type: - reply["actions"]["commit"]["header"]["x-ms-blob-content-type"] = mime_type + reply["actions"]["commit"]["header"][ + "x-ms-blob-content-type" + ] = mime_type if parts: - reply['actions']['parts'] = parts + reply["actions"]["parts"] = parts return reply def _get_blob_path(self, prefix: str, oid: str) -> str: - """Get the path to a blob in storage - """ + """Get the path to a blob in storage""" if not self.path_prefix: - storage_prefix = '' - elif self.path_prefix[0] == '/': + storage_prefix = "" + elif self.path_prefix[0] == "/": storage_prefix = self.path_prefix[1:] else: storage_prefix = self.path_prefix return posixpath.join(storage_prefix, prefix, oid) - def _get_signed_url(self, prefix: str, oid: str, expires_in: int, filename: Optional[str] = None, - disposition: Optional[str] = None, **permissions: bool) -> str: + def _get_signed_url( + self, + prefix: str, + oid: str, + expires_in: int, + filename: Optional[str] = None, + disposition: Optional[str] = None, + **permissions: bool, + ) -> str: blob_name = self._get_blob_path(prefix, oid) blob_permissions = BlobSasPermissions(**permissions) - token_expires = (datetime.now(tz=timezone.utc) + timedelta(seconds=expires_in)) + token_expires = datetime.now(tz=timezone.utc) + timedelta(seconds=expires_in) extra_args: Dict[str, Any] = {} if filename and disposition: - extra_args['content_disposition'] = f'{disposition}; filename="{filename}"' + extra_args["content_disposition"] = f'{disposition}; filename="{filename}"' elif disposition: - extra_args['content_disposition'] = f'{disposition};"' - - sas_token = generate_blob_sas(account_name=self.blob_svc_client.account_name, - account_key=self.blob_svc_client.credential.account_key, - container_name=self.container_name, - blob_name=blob_name, - permission=blob_permissions, - expiry=token_expires, - **extra_args) - - blob_client = BlobClient(self.blob_svc_client.url, container_name=self.container_name, blob_name=blob_name, - credential=sas_token) + extra_args["content_disposition"] = f'{disposition};"' + + sas_token = generate_blob_sas( + account_name=self.blob_svc_client.account_name, + account_key=self.blob_svc_client.credential.account_key, + container_name=self.container_name, + blob_name=blob_name, + permission=blob_permissions, + expiry=token_expires, + **extra_args, + ) + + blob_client = BlobClient( + self.blob_svc_client.url, + container_name=self.container_name, + blob_name=blob_name, + credential=sas_token, + ) return blob_client.url # type: ignore - def _get_uncommitted_blocks(self, prefix: str, oid: str, blocks: List[Block]) -> Dict[int, int]: - """Get list of uncommitted blocks from the server - """ - blob_client = self.blob_svc_client.get_blob_client(container=self.container_name, - blob=self._get_blob_path(prefix, oid)) + def _get_uncommitted_blocks( + self, prefix: str, oid: str, blocks: List[Block] + ) -> Dict[int, int]: + """Get list of uncommitted blocks from the server""" + blob_client = self.blob_svc_client.get_blob_client( + container=self.container_name, blob=self._get_blob_path(prefix, oid) + ) try: - committed_blocks, uncommitted_blocks = blob_client.get_block_list(block_list_type='all') + committed_blocks, uncommitted_blocks = blob_client.get_block_list( + block_list_type="all" + ) except ResourceNotFoundError: return {} if committed_blocks: - _log.warning(f"Committed blocks found for {oid}, this is unexpected state; restarting upload") + _log.warning( + f"Committed blocks found for {oid}, this is unexpected state; restarting upload" + ) blob_client.delete_blob() return {} try: # NOTE: The Azure python library already does ID base64 decoding for us, so we only case to int here - existing_blocks = {int(b['id']): b['size'] for b in uncommitted_blocks} + existing_blocks = {int(b["id"]): b["size"] for b in uncommitted_blocks} except ValueError: - _log.warning("Some uncommitted blocks have unexpected ID format; restarting upload") + _log.warning( + "Some uncommitted blocks have unexpected ID format; restarting upload" + ) return {} - _log.debug("Found %d existing uncommitted blocks on server", len(existing_blocks)) + _log.debug( + "Found %d existing uncommitted blocks on server", len(existing_blocks) + ) # Verify that existing blocks are the same as what we plan to upload for block in blocks: if block.id in existing_blocks and existing_blocks[block.id] != block.size: - _log.warning("Uncommitted block size does not match our plan, restating upload") + _log.warning( + "Uncommitted block size does not match our plan, restating upload" + ) blob_client.delete_blob() return {} return existing_blocks - def _create_part_request(self, base_url: str, block: Block, expires_in: int) -> Dict[str, Any]: - """Create the part request object for a block - """ + def _create_part_request( + self, base_url: str, block: Block, expires_in: int + ) -> Dict[str, Any]: + """Create the part request object for a block""" block_id = self._encode_block_id(block.id) part = { - "href": f'{base_url}&comp=block&blockid={block_id}', + "href": f"{base_url}&comp=block&blockid={block_id}", "pos": block.start, "size": block.size, "expires_in": expires_in, } if self.enable_content_digest: - part['want_digest'] = 'contentMD5' + part["want_digest"] = "contentMD5" return part @@ -245,14 +329,22 @@ def _create_commit_body(self, blocks: List[Block]) -> str: here. If this ever gets complex, it may be a good idea to rely on lxml or similar. """ return '{}'.format( - ''.join(['{}'.format(xml_escape(self._encode_block_id(b.id))) for b in blocks]) + "".join( + [ + "{}".format( + xml_escape(self._encode_block_id(b.id)) + ) + for b in blocks + ] + ) ) @classmethod def _encode_block_id(cls, b_id: int) -> str: - """Encode a block ID in the manner expected by the Azure API - """ - return base64.b64encode(str(b_id).zfill(cls._PART_ID_BYTE_SIZE).encode('ascii')).decode('ascii') + """Encode a block ID in the manner expected by the Azure API""" + return base64.b64encode( + str(b_id).zfill(cls._PART_ID_BYTE_SIZE).encode("ascii") + ).decode("ascii") def _calculate_blocks(file_size: int, part_size: int) -> List[Block]: @@ -272,9 +364,13 @@ def _calculate_blocks(file_size: int, part_size: int) -> List[Block]: """ full_blocks = file_size // part_size last_block_size = file_size % part_size - blocks = [Block(id=i, start=i * part_size, size=part_size) for i in range(full_blocks)] + blocks = [ + Block(id=i, start=i * part_size, size=part_size) for i in range(full_blocks) + ] if last_block_size: - blocks.append(Block(id=full_blocks, start=full_blocks * part_size, size=last_block_size)) + blocks.append( + Block(id=full_blocks, start=full_blocks * part_size, size=last_block_size) + ) return blocks diff --git a/giftless/storage/exc.py b/giftless/storage/exc.py index c85f92b..eca1453 100644 --- a/giftless/storage/exc.py +++ b/giftless/storage/exc.py @@ -4,13 +4,12 @@ class StorageError(RuntimeError): - """Base class for storage errors - """ + """Base class for storage errors""" + code: Optional[int] = None def as_dict(self): - return {"message": str(self), - "code": self.code} + return {"message": str(self), "code": self.code} class ObjectNotFound(StorageError): diff --git a/giftless/storage/google_cloud.py b/giftless/storage/google_cloud.py index 09587e9..069eb0b 100644 --- a/giftless/storage/google_cloud.py +++ b/giftless/storage/google_cloud.py @@ -20,22 +20,24 @@ class GoogleCloudStorage(StreamingStorage, ExternalStorage): transfers. """ - def __init__(self, - project_name: str, - bucket_name: str, - account_key_file: Optional[str] = None, - account_key_base64: Optional[str] = None, - path_prefix: Optional[str] = None, - serviceaccount_email: Optional[str] = None, - **_): + def __init__( + self, + project_name: str, + bucket_name: str, + account_key_file: Optional[str] = None, + account_key_base64: Optional[str] = None, + path_prefix: Optional[str] = None, + serviceaccount_email: Optional[str] = None, + **_, + ): self.bucket_name = bucket_name self.path_prefix = path_prefix - self.credentials: Optional[Union[ - service_account.Credentials, impersonated_credentials.Credentials - ]] = self._load_credentials( - account_key_file, account_key_base64 + self.credentials: Optional[ + Union[service_account.Credentials, impersonated_credentials.Credentials] + ] = self._load_credentials(account_key_file, account_key_base64) + self.storage_client = storage.Client( + project=project_name, credentials=self.credentials ) - self.storage_client = storage.Client(project=project_name, credentials=self.credentials) if not self.credentials: if not serviceaccount_email: raise ValueError( @@ -48,7 +50,7 @@ def get(self, prefix: str, oid: str) -> BinaryIO: bucket = self.storage_client.bucket(self.bucket_name) blob = bucket.get_blob(self._get_blob_path(prefix, oid)) if blob is None: - raise ObjectNotFound('Object does not exist') + raise ObjectNotFound("Object does not exist") stream = io.BytesIO() blob.download_to_file(stream) stream.seek(0) @@ -72,69 +74,103 @@ def get_size(self, prefix: str, oid: str) -> int: raise ObjectNotFound("Object does not exist") return blob.size # type: ignore - def get_upload_action(self, prefix: str, oid: str, size: int, expires_in: int, - extra: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: + def get_upload_action( + self, + prefix: str, + oid: str, + size: int, + expires_in: int, + extra: Optional[Dict[str, Any]] = None, + ) -> Dict[str, Any]: return { "actions": { "upload": { - "href": self._get_signed_url(prefix, oid, http_method='PUT', expires_in=expires_in), + "href": self._get_signed_url( + prefix, oid, http_method="PUT", expires_in=expires_in + ), "header": {}, - "expires_in": expires_in + "expires_in": expires_in, } } } - def get_download_action(self, prefix: str, oid: str, size: int, expires_in: int, - extra: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: - filename = extra.get('filename') if extra else None - disposition = extra.get('disposition', 'attachment') if extra else 'attachment' + def get_download_action( + self, + prefix: str, + oid: str, + size: int, + expires_in: int, + extra: Optional[Dict[str, Any]] = None, + ) -> Dict[str, Any]: + filename = extra.get("filename") if extra else None + disposition = extra.get("disposition", "attachment") if extra else "attachment" return { "actions": { "download": { "href": self._get_signed_url( - prefix, oid, expires_in=expires_in, filename=filename, disposition=disposition), + prefix, + oid, + expires_in=expires_in, + filename=filename, + disposition=disposition, + ), "header": {}, - "expires_in": expires_in + "expires_in": expires_in, } } } def _get_blob_path(self, prefix: str, oid: str) -> str: - """Get the path to a blob in storage - """ + """Get the path to a blob in storage""" if not self.path_prefix: - storage_prefix = '' - elif self.path_prefix[0] == '/': + storage_prefix = "" + elif self.path_prefix[0] == "/": storage_prefix = self.path_prefix[1:] else: storage_prefix = self.path_prefix return posixpath.join(storage_prefix, prefix, oid) - def _get_signed_url(self, prefix: str, oid: str, expires_in: int, http_method: str = 'GET', - filename: Optional[str] = None, disposition: Optional[str] = None) -> str: + def _get_signed_url( + self, + prefix: str, + oid: str, + expires_in: int, + http_method: str = "GET", + filename: Optional[str] = None, + disposition: Optional[str] = None, + ) -> str: creds = self.credentials if creds is None: creds = self._get_workload_identity_credentials(expires_in) bucket = self.storage_client.bucket(self.bucket_name) blob = bucket.blob(self._get_blob_path(prefix, oid)) - disposition = f'attachment; filename={filename}' if filename else None + disposition = f"attachment; filename={filename}" if filename else None if filename and disposition: disposition = f'{disposition}; filename="{filename}"' - url: str = blob.generate_signed_url(expiration=timedelta(seconds=expires_in), method=http_method, version='v4', - response_disposition=disposition, credentials=creds) + url: str = blob.generate_signed_url( + expiration=timedelta(seconds=expires_in), + method=http_method, + version="v4", + response_disposition=disposition, + credentials=creds, + ) return url @staticmethod - def _load_credentials(account_key_file: Optional[str], account_key_base64: Optional[str]) \ - -> Optional[service_account.Credentials]: - """Load Google Cloud credentials from passed configuration - """ + def _load_credentials( + account_key_file: Optional[str], account_key_base64: Optional[str] + ) -> Optional[service_account.Credentials]: + """Load Google Cloud credentials from passed configuration""" if account_key_file and account_key_base64: - raise ValueError('Provide either account_key_file or account_key_base64 but not both') + raise ValueError( + "Provide either account_key_file or account_key_base64 but not both" + ) elif account_key_file: - return service_account.Credentials.from_service_account_file(account_key_file) + return service_account.Credentials.from_service_account_file( + account_key_file + ) elif account_key_base64: account_info = json.loads(base64.b64decode(account_key_base64)) return service_account.Credentials.from_service_account_info(account_info) @@ -156,7 +192,7 @@ def _get_workload_identity_credentials( target_principal=email, target_scopes=( "https://www.googleapis.com/auth/devstorage.read_only", - "https://www.googleapis.com/auth/devstorage.read_write" + "https://www.googleapis.com/auth/devstorage.read_write", ), - lifetime=lifetime + lifetime=lifetime, ) diff --git a/giftless/storage/local_storage.py b/giftless/storage/local_storage.py index 2dda4f5..5b0ec82 100644 --- a/giftless/storage/local_storage.py +++ b/giftless/storage/local_storage.py @@ -13,16 +13,17 @@ class LocalStorage(StreamingStorage, MultipartStorage, ViewProvider): While it can be used in production, large scale deployment will most likely want to use a more scalable solution such as one of the cloud storage backends. """ + def __init__(self, path: Optional[str] = None, **_) -> None: if path is None: - path = 'lfs-storage' + path = "lfs-storage" self.path = path self._create_path(self.path) def get(self, prefix: str, oid: str) -> BinaryIO: path = self._get_path(prefix, oid) if os.path.isfile(path): - return open(path, 'br') + return open(path, "br") else: raise exc.ObjectNotFound("Object was not found") @@ -30,7 +31,7 @@ def put(self, prefix: str, oid: str, data_stream: BinaryIO) -> int: path = self._get_path(prefix, oid) directory = os.path.dirname(path) self._create_path(directory) - with open(path, 'bw') as dest: + with open(path, "bw") as dest: shutil.copyfileobj(data_stream, dest) return dest.tell() @@ -47,12 +48,25 @@ def get_mime_type(self, prefix: str, oid: str) -> str: return "application/octet-stream" raise exc.ObjectNotFound("Object was not found") - def get_multipart_actions(self, prefix: str, oid: str, size: int, part_size: int, expires_in: int, - extra: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: + def get_multipart_actions( + self, + prefix: str, + oid: str, + size: int, + part_size: int, + expires_in: int, + extra: Optional[Dict[str, Any]] = None, + ) -> Dict[str, Any]: return {} - def get_download_action(self, prefix: str, oid: str, size: int, expires_in: int, - extra: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: + def get_download_action( + self, + prefix: str, + oid: str, + size: int, + expires_in: int, + extra: Optional[Dict[str, Any]] = None, + ) -> Dict[str, Any]: return {} def register_views(self, app): diff --git a/giftless/transfer/__init__.py b/giftless/transfer/__init__.py index d64dc6d..5e37925 100644 --- a/giftless/transfer/__init__.py +++ b/giftless/transfer/__init__.py @@ -11,29 +11,41 @@ from giftless.util import add_query_params, get_callable from giftless.view import ViewProvider -_registered_adapters: Dict[str, 'TransferAdapter'] = {} +_registered_adapters: Dict[str, "TransferAdapter"] = {} class TransferAdapter(ABC): - """A transfer adapter tells Git LFS Server how to respond to batch API requests - """ - def upload(self, organization: str, repo: str, oid: str, size: int, - extra: Optional[Dict[str, Any]] = None) -> Dict: + """A transfer adapter tells Git LFS Server how to respond to batch API requests""" + + def upload( + self, + organization: str, + repo: str, + oid: str, + size: int, + extra: Optional[Dict[str, Any]] = None, + ) -> Dict: raise NotImplementedError("This transfer adapter is not fully implemented") - def download(self, organization: str, repo: str, oid: str, size: int, - extra: Optional[Dict[str, Any]] = None) -> Dict: + def download( + self, + organization: str, + repo: str, + oid: str, + size: int, + extra: Optional[Dict[str, Any]] = None, + ) -> Dict: raise NotImplementedError("This transfer adapter is not fully implemented") - def get_action(self, name: str, organization: str, repo: str) -> Callable[[str, int], Dict]: - """Shortcut for quickly getting an action callable for transfer adapter objects - """ + def get_action( + self, name: str, organization: str, repo: str + ) -> Callable[[str, int], Dict]: + """Shortcut for quickly getting an action callable for transfer adapter objects""" return partial(getattr(self, name), organization=organization, repo=repo) class PreAuthorizingTransferAdapter(TransferAdapter, ABC): - """A transfer adapter that can pre-authohrize one or more of the actions it supports - """ + """A transfer adapter that can pre-authohrize one or more of the actions it supports""" # Lifetime of verify tokens can be very long VERIFY_LIFETIME = 3600 * 12 @@ -43,8 +55,15 @@ class PreAuthorizingTransferAdapter(TransferAdapter, ABC): def set_auth_module(self, auth_module: Authentication): self._auth_module = auth_module - def _preauth_url(self, original_url: str, org: str, repo: str, actions: Optional[Set[str]] = None, - oid: Optional[str] = None, lifetime: Optional[int] = None) -> str: + def _preauth_url( + self, + original_url: str, + org: str, + repo: str, + actions: Optional[Set[str]] = None, + oid: Optional[str] = None, + lifetime: Optional[int] = None, + ) -> str: if not (self._auth_module and self._auth_module.preauth_handler): return original_url @@ -52,13 +71,20 @@ def _preauth_url(self, original_url: str, org: str, repo: str, actions: Optional if identity is None: return original_url - params = self._auth_module.preauth_handler.get_authz_query_params(identity, org, repo, actions, oid, - lifetime=lifetime) + params = self._auth_module.preauth_handler.get_authz_query_params( + identity, org, repo, actions, oid, lifetime=lifetime + ) return add_query_params(original_url, params) - def _preauth_headers(self, org: str, repo: str, actions: Optional[Set[str]] = None, - oid: Optional[str] = None, lifetime: Optional[int] = None) -> Dict[str, str]: + def _preauth_headers( + self, + org: str, + repo: str, + actions: Optional[Set[str]] = None, + oid: Optional[str] = None, + lifetime: Optional[int] = None, + ) -> Dict[str, str]: if not (self._auth_module and self._auth_module.preauth_handler): return {} @@ -66,7 +92,9 @@ def _preauth_headers(self, org: str, repo: str, actions: Optional[Set[str]] = No if identity is None: return {} - return self._auth_module.preauth_handler.get_authz_header(identity, org, repo, actions, oid, lifetime=lifetime) + return self._auth_module.preauth_handler.get_authz_header( + identity, org, repo, actions, oid, lifetime=lifetime + ) def init_flask_app(app): @@ -76,18 +104,19 @@ def init_flask_app(app): - Instantiate all transfer adapters defined in config - Register any Flask views provided by these adapters """ - config = app.config.get('TRANSFER_ADAPTERS', {}) + config = app.config.get("TRANSFER_ADAPTERS", {}) adapters = {k: _init_adapter(v) for k, v in config.items()} for k, adapter in adapters.items(): register_adapter(k, adapter) - for adapter in (a for a in _registered_adapters.values() if isinstance(a, ViewProvider)): + for adapter in ( + a for a in _registered_adapters.values() if isinstance(a, ViewProvider) + ): adapter.register_views(app) def register_adapter(key: str, adapter: TransferAdapter): - """Register a transfer adapter - """ + """Register a transfer adapter""" _registered_adapters[key] = adapter @@ -99,10 +128,9 @@ def match_transfer_adapter(transfers: List[str]) -> Tuple[str, TransferAdapter]: def _init_adapter(config: Dict) -> TransferAdapter: - """Call adapter factory to create a transfer adapter instance - """ - factory: Callable[..., TransferAdapter] = get_callable(config['factory']) - adapter: TransferAdapter = factory(**config.get('options', {})) + """Call adapter factory to create a transfer adapter instance""" + factory: Callable[..., TransferAdapter] = get_callable(config["factory"]) + adapter: TransferAdapter = factory(**config.get("options", {})) if isinstance(adapter, PreAuthorizingTransferAdapter): adapter.set_auth_module(authentication) return adapter diff --git a/giftless/transfer/basic_external.py b/giftless/transfer/basic_external.py index 0996d55..748b057 100644 --- a/giftless/transfer/basic_external.py +++ b/giftless/transfer/basic_external.py @@ -21,47 +21,70 @@ class BasicExternalBackendTransferAdapter(PreAuthorizingTransferAdapter, ViewProvider): - def __init__(self, storage: ExternalStorage, default_action_lifetime: int): self.storage = storage self.action_lifetime = default_action_lifetime - def upload(self, organization: str, repo: str, oid: str, size: int, extra: Optional[Dict[str, Any]] = None) -> Dict: + def upload( + self, + organization: str, + repo: str, + oid: str, + size: int, + extra: Optional[Dict[str, Any]] = None, + ) -> Dict: prefix = posixpath.join(organization, repo) - response = {"oid": oid, - "size": size} + response = {"oid": oid, "size": size} if self.storage.verify_object(prefix, oid, size): # No upload required, we already have this object return response - response.update(self.storage.get_upload_action(prefix, oid, size, self.action_lifetime, extra)) - if response.get('actions', {}).get('upload'): # type: ignore - response['authenticated'] = True - headers = self._preauth_headers(organization, repo, actions={'verify'}, oid=oid, - lifetime=self.VERIFY_LIFETIME) - response['actions']['verify'] = { # type: ignore + response.update( + self.storage.get_upload_action( + prefix, oid, size, self.action_lifetime, extra + ) + ) + if response.get("actions", {}).get("upload"): # type: ignore + response["authenticated"] = True + headers = self._preauth_headers( + organization, + repo, + actions={"verify"}, + oid=oid, + lifetime=self.VERIFY_LIFETIME, + ) + response["actions"]["verify"] = { # type: ignore "href": VerifyView.get_verify_url(organization, repo), "header": headers, - "expires_in": self.VERIFY_LIFETIME + "expires_in": self.VERIFY_LIFETIME, } return response - def download(self, organization: str, repo: str, oid: str, size: int, - extra: Optional[Dict[str, Any]] = None) -> Dict: + def download( + self, + organization: str, + repo: str, + oid: str, + size: int, + extra: Optional[Dict[str, Any]] = None, + ) -> Dict: prefix = posixpath.join(organization, repo) - response = {"oid": oid, - "size": size} + response = {"oid": oid, "size": size} try: self._check_object(prefix, oid, size) - response.update(self.storage.get_download_action(prefix, oid, size, self.action_lifetime, extra)) + response.update( + self.storage.get_download_action( + prefix, oid, size, self.action_lifetime, extra + ) + ) except exc.StorageError as e: - response['error'] = e.as_dict() + response["error"] = e.as_dict() - if response.get('actions', {}).get('download'): # type: ignore - response['authenticated'] = True + if response.get("actions", {}).get("download"): # type: ignore + response["authenticated"] = True return response @@ -75,11 +98,12 @@ def _check_object(self, prefix: str, oid: str, size: int): we want ObjectNotFound errors to be propagated if raised """ if self.storage.get_size(prefix, oid) != size: - raise exc.InvalidObject('Object size does not match') + raise exc.InvalidObject("Object size does not match") def factory(storage_class, storage_options, action_lifetime): - """Factory for basic transfer adapter with external storage - """ + """Factory for basic transfer adapter with external storage""" storage = get_callable(storage_class, __name__) - return BasicExternalBackendTransferAdapter(storage(**storage_options), action_lifetime) + return BasicExternalBackendTransferAdapter( + storage(**storage_options), action_lifetime + ) diff --git a/giftless/transfer/basic_streaming.py b/giftless/transfer/basic_streaming.py index 85636b1..624da0c 100644 --- a/giftless/transfer/basic_streaming.py +++ b/giftless/transfer/basic_streaming.py @@ -30,35 +30,39 @@ class VerifyView(BaseView): transfer adapters that need a 'verify' action as well. """ - route_base = '//objects/storage' + route_base = "//objects/storage" def __init__(self, storage: VerifiableStorage): self.storage = storage - @route('/verify', methods=['POST']) + @route("/verify", methods=["POST"]) def verify(self, organization, repo): schema = ObjectSchema(unknown=marshmallow.EXCLUDE) payload = parser.parse(schema) - self._check_authorization(organization, repo, Permission.READ_META, oid=payload['oid']) + self._check_authorization( + organization, repo, Permission.READ_META, oid=payload["oid"] + ) prefix = posixpath.join(organization, repo) - if not self.storage.verify_object(prefix, payload['oid'], payload['size']): + if not self.storage.verify_object(prefix, payload["oid"], payload["size"]): raise InvalidPayload("Object does not exist or size does not match") return Response(status=200) @classmethod - def get_verify_url(cls, organization: str, repo: str, oid: Optional[str] = None) -> str: - """Get the URL for upload / download requests for this object - """ - op_name = f'{cls.__name__}:verify' - url: str = url_for(op_name, organization=organization, repo=repo, oid=oid, _external=True) + def get_verify_url( + cls, organization: str, repo: str, oid: Optional[str] = None + ) -> str: + """Get the URL for upload / download requests for this object""" + op_name = f"{cls.__name__}:verify" + url: str = url_for( + op_name, organization=organization, repo=repo, oid=oid, _external=True + ) return url class ObjectsView(BaseView): - - route_base = '//objects/storage' + route_base = "//objects/storage" def __init__(self, storage: StreamingStorage): self.storage = storage @@ -73,105 +77,123 @@ def put(self, organization, repo, oid): """ self._check_authorization(organization, repo, Permission.WRITE, oid=oid) stream = request.stream - self.storage.put(prefix=f'{organization}/{repo}', oid=oid, data_stream=stream) + self.storage.put(prefix=f"{organization}/{repo}", oid=oid, data_stream=stream) return Response(status=200) def get(self, organization, repo, oid): - """Get an file open file stream from local storage - """ + """Get an file open file stream from local storage""" self._check_authorization(organization, repo, Permission.READ, oid=oid) path = posixpath.join(organization, repo) - filename = request.args.get('filename') + filename = request.args.get("filename") filename = safe_filename(filename) if filename else None - disposition = request.args.get('disposition') + disposition = request.args.get("disposition") headers = {} if filename and disposition: - headers = {'Content-Disposition': f'attachment; filename="{filename}"'} + headers = {"Content-Disposition": f'attachment; filename="{filename}"'} elif disposition: - headers = {'Content-Disposition': disposition} + headers = {"Content-Disposition": disposition} if self.storage.exists(path, oid): file = self.storage.get(path, oid) mime_type = self.storage.get_mime_type(path, oid) - headers['Content-Type'] = mime_type + headers["Content-Type"] = mime_type return Response(file, direct_passthrough=True, status=200, headers=headers) else: raise NotFound("The object was not found") @classmethod - def get_storage_url(cls, operation: str, organization: str, repo: str, oid: Optional[str] = None) -> str: - """Get the URL for upload / download requests for this object - """ - op_name = f'{cls.__name__}:{operation}' - url: str = url_for(op_name, organization=organization, repo=repo, oid=oid, _external=True) + def get_storage_url( + cls, operation: str, organization: str, repo: str, oid: Optional[str] = None + ) -> str: + """Get the URL for upload / download requests for this object""" + op_name = f"{cls.__name__}:{operation}" + url: str = url_for( + op_name, organization=organization, repo=repo, oid=oid, _external=True + ) return url class BasicStreamingTransferAdapter(PreAuthorizingTransferAdapter, ViewProvider): - def __init__(self, storage: StreamingStorage, action_lifetime: int): self.storage = storage self.action_lifetime = action_lifetime - def upload(self, organization: str, repo: str, oid: str, size: int, extra: Optional[Dict[str, Any]] = None) -> Dict: - response = {"oid": oid, - "size": size} + def upload( + self, + organization: str, + repo: str, + oid: str, + size: int, + extra: Optional[Dict[str, Any]] = None, + ) -> Dict: + response = {"oid": oid, "size": size} prefix = posixpath.join(organization, repo) - if not self.storage.exists(prefix, oid) or self.storage.get_size(prefix, oid) != size: - response['actions'] = { + if ( + not self.storage.exists(prefix, oid) + or self.storage.get_size(prefix, oid) != size + ): + response["actions"] = { "upload": { - "href": ObjectsView.get_storage_url('put', organization, repo, oid), - "header": self._preauth_headers(organization, repo, actions={'write'}, oid=oid), - "expires_in": self.action_lifetime + "href": ObjectsView.get_storage_url("put", organization, repo, oid), + "header": self._preauth_headers( + organization, repo, actions={"write"}, oid=oid + ), + "expires_in": self.action_lifetime, }, "verify": { "href": VerifyView.get_verify_url(organization, repo), - "header": self._preauth_headers(organization, repo, actions={'verify'}, oid=oid, - lifetime=self.VERIFY_LIFETIME), - "expires_in": self.VERIFY_LIFETIME - } + "header": self._preauth_headers( + organization, + repo, + actions={"verify"}, + oid=oid, + lifetime=self.VERIFY_LIFETIME, + ), + "expires_in": self.VERIFY_LIFETIME, + }, } - response['authenticated'] = True + response["authenticated"] = True return response - def download(self, organization: str, repo: str, oid: str, size: int, - extra: Optional[Dict[str, Any]] = None) -> Dict: - response = {"oid": oid, - "size": size} + def download( + self, + organization: str, + repo: str, + oid: str, + size: int, + extra: Optional[Dict[str, Any]] = None, + ) -> Dict: + response = {"oid": oid, "size": size} prefix = posixpath.join(organization, repo) if not self.storage.exists(prefix, oid): - response['error'] = { - "code": 404, - "message": "Object does not exist" - } + response["error"] = {"code": 404, "message": "Object does not exist"} elif self.storage.get_size(prefix, oid) != size: - response['error'] = { - "code": 422, - "message": "Object size does not match" - } + response["error"] = {"code": 422, "message": "Object size does not match"} else: - download_url = ObjectsView.get_storage_url('get', organization, repo, oid) - preauth_url = self._preauth_url(download_url, organization, repo, actions={'read'}, oid=oid) + download_url = ObjectsView.get_storage_url("get", organization, repo, oid) + preauth_url = self._preauth_url( + download_url, organization, repo, actions={"read"}, oid=oid + ) - if extra and 'filename' in extra: - params = {'filename': extra['filename']} + if extra and "filename" in extra: + params = {"filename": extra["filename"]} preauth_url = add_query_params(preauth_url, params) - response['actions'] = { + response["actions"] = { "download": { "href": preauth_url, "header": {}, - "expires_in": self.action_lifetime + "expires_in": self.action_lifetime, } } - response['authenticated'] = True + response["authenticated"] = True return response @@ -181,7 +203,6 @@ def register_views(self, app): def factory(storage_class, storage_options, action_lifetime): - """Factory for basic transfer adapter with local storage - """ + """Factory for basic transfer adapter with local storage""" storage = get_callable(storage_class, __name__) return BasicStreamingTransferAdapter(storage(**storage_options), action_lifetime) diff --git a/giftless/transfer/multipart.py b/giftless/transfer/multipart.py index 34ec52c..6415296 100644 --- a/giftless/transfer/multipart.py +++ b/giftless/transfer/multipart.py @@ -14,49 +14,75 @@ class MultipartTransferAdapter(PreAuthorizingTransferAdapter, ViewProvider): - - def __init__(self, storage: MultipartStorage, default_action_lifetime: int, max_part_size: int = DEFAULT_PART_SIZE): + def __init__( + self, + storage: MultipartStorage, + default_action_lifetime: int, + max_part_size: int = DEFAULT_PART_SIZE, + ): self.storage = storage self.max_part_size = max_part_size self.action_lifetime = default_action_lifetime - def upload(self, organization: str, repo: str, oid: str, size: int, extra: Optional[Dict[str, Any]] = None) -> Dict: + def upload( + self, + organization: str, + repo: str, + oid: str, + size: int, + extra: Optional[Dict[str, Any]] = None, + ) -> Dict: prefix = posixpath.join(organization, repo) - response = {"oid": oid, - "size": size} + response = {"oid": oid, "size": size} if self.storage.verify_object(prefix, oid, size): # No upload required, we already have this object return response - actions = self.storage.get_multipart_actions(prefix, oid, size, self.max_part_size, self.action_lifetime, extra) + actions = self.storage.get_multipart_actions( + prefix, oid, size, self.max_part_size, self.action_lifetime, extra + ) response.update(actions) - if response.get('actions'): - response['authenticated'] = True - headers = self._preauth_headers(organization, repo, actions={'verify'}, oid=oid, - lifetime=self.VERIFY_LIFETIME) - response['actions']['verify'] = { # type: ignore + if response.get("actions"): + response["authenticated"] = True + headers = self._preauth_headers( + organization, + repo, + actions={"verify"}, + oid=oid, + lifetime=self.VERIFY_LIFETIME, + ) + response["actions"]["verify"] = { # type: ignore "href": VerifyView.get_verify_url(organization, repo), "header": headers, - "expires_in": self.VERIFY_LIFETIME + "expires_in": self.VERIFY_LIFETIME, } return response - def download(self, organization: str, repo: str, oid: str, size: int, - extra: Optional[Dict[str, Any]] = None) -> Dict: + def download( + self, + organization: str, + repo: str, + oid: str, + size: int, + extra: Optional[Dict[str, Any]] = None, + ) -> Dict: prefix = posixpath.join(organization, repo) - response = {"oid": oid, - "size": size} + response = {"oid": oid, "size": size} try: self._check_object(prefix, oid, size) - response.update(self.storage.get_download_action(prefix, oid, size, self.action_lifetime, extra)) + response.update( + self.storage.get_download_action( + prefix, oid, size, self.action_lifetime, extra + ) + ) except exc.StorageError as e: - response['error'] = e.as_dict() + response["error"] = e.as_dict() - if response.get('actions', {}).get('download'): # type: ignore - response['authenticated'] = True + if response.get("actions", {}).get("download"): # type: ignore + response["authenticated"] = True return response @@ -73,15 +99,20 @@ def _check_object(self, prefix: str, oid: str, size: int): we want ObjectNotFound errors to be propagated if raised """ if self.storage.get_size(prefix, oid) != size: - raise exc.InvalidObject('Object size does not match') + raise exc.InvalidObject("Object size does not match") -def factory(storage_class, storage_options, action_lifetime: int = DEFAULT_ACTION_LIFETIME, - max_part_size: int = DEFAULT_PART_SIZE): - """Factory for multipart transfer adapter with storage - """ +def factory( + storage_class, + storage_options, + action_lifetime: int = DEFAULT_ACTION_LIFETIME, + max_part_size: int = DEFAULT_PART_SIZE, +): + """Factory for multipart transfer adapter with storage""" try: storage = get_callable(storage_class, __name__) except (AttributeError, ImportError): raise ValueError(f"Unable to load storage module: {storage_class}") - return MultipartTransferAdapter(storage(**storage_options), action_lifetime, max_part_size=max_part_size) + return MultipartTransferAdapter( + storage(**storage_options), action_lifetime, max_part_size=max_part_size + ) diff --git a/giftless/transfer/types.py b/giftless/transfer/types.py index f806d2a..4732952 100644 --- a/giftless/transfer/types.py +++ b/giftless/transfer/types.py @@ -10,8 +10,8 @@ class ObjectAttributes(TypedDict): - """Type for object attributes sent in batch request - """ + """Type for object attributes sent in batch request""" + oid: str size: int diff --git a/giftless/util.py b/giftless/util.py index 7a7870f..c25cc18 100644 --- a/giftless/util.py +++ b/giftless/util.py @@ -15,14 +15,16 @@ def get_callable(callable_str: str, base_package: Optional[str] = None) -> Calla >>> type(get_callable('basename', 'os.path')).__name__ 'function' """ - if ':' in callable_str: - module_name, callable_name = callable_str.split(':', 1) + if ":" in callable_str: + module_name, callable_name = callable_str.split(":", 1) module = importlib.import_module(module_name, base_package) elif base_package: module = importlib.import_module(base_package) callable_name = callable_str else: - raise ValueError("Expecting base_package to be set if only class name is provided") + raise ValueError( + "Expecting base_package to be set if only class name is provided" + ) return getattr(module, callable_name) # type: ignore @@ -66,8 +68,8 @@ def add_query_params(url: str, params: Dict[str, Any]) -> str: 'https://example.org?param1=value1¶m2=value2' """ urlencoded_params = urlencode(params) - separator = '&' if '?' in url else '?' - return f'{url}{separator}{urlencoded_params}' + separator = "&" if "?" in url else "?" + return f"{url}{separator}{urlencoded_params}" def safe_filename(original_filename: str) -> str: @@ -80,5 +82,5 @@ def safe_filename(original_filename: str) -> str: >>> safe_filename("_ex@mple 2%.old.xlsx") '_exmple2.old.xlsx' """ - valid_chars = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_.' - return ''.join(c for c in original_filename if c in valid_chars) + valid_chars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_." + return "".join(c for c in original_filename if c in valid_chars) diff --git a/giftless/view.py b/giftless/view.py index 3d0db74..013a4f9 100644 --- a/giftless/view.py +++ b/giftless/view.py @@ -17,64 +17,72 @@ class BaseView(FlaskView): decorators = [authn.login_required] - representations = {'application/json': representation.output_json, - representation.GIT_LFS_MIME_TYPE: representation.output_git_lfs_json, - 'flask-classful/default': representation.output_git_lfs_json} + representations = { + "application/json": representation.output_json, + representation.GIT_LFS_MIME_TYPE: representation.output_git_lfs_json, + "flask-classful/default": representation.output_git_lfs_json, + } trailing_slash = False @classmethod def register(cls, *args, **kwargs): - if kwargs.get('base_class') is None: - kwargs['base_class'] = BaseView + if kwargs.get("base_class") is None: + kwargs["base_class"] = BaseView return super().register(*args, **kwargs) @classmethod def _check_authorization(cls, organization, repo, permission, oid=None): - """Check the current user is authorized to perform an action and raise an exception otherwise - """ + """Check the current user is authorized to perform an action and raise an exception otherwise""" if not cls._is_authorized(organization, repo, permission, oid): raise exc.Forbidden("Your are not authorized to perform this action") @staticmethod def _is_authorized(organization, repo, permission, oid=None): - """Check the current user is authorized to perform an action - """ + """Check the current user is authorized to perform an action""" identity = authn.get_identity() return identity and identity.is_authorized(organization, repo, permission, oid) class BatchView(BaseView): - """Batch operations - """ - route_base = '//objects/batch' + """Batch operations""" + + route_base = "//objects/batch" def post(self, organization, repo): - """Batch operations - """ + """Batch operations""" payload = parser.parse(schema.batch_request_schema) try: - transfer_type, adapter = transfer.match_transfer_adapter(payload['transfers']) + transfer_type, adapter = transfer.match_transfer_adapter( + payload["transfers"] + ) except ValueError as e: raise exc.InvalidPayload(e) - permission = Permission.WRITE if payload['operation'] == schema.Operation.upload else Permission.READ + permission = ( + Permission.WRITE + if payload["operation"] == schema.Operation.upload + else Permission.READ + ) try: self._check_authorization(organization, repo, permission) except exc.Forbidden: # User doesn't have global permission to the entire namespace, but may be authorized for all objects - if not all(self._is_authorized(organization, repo, permission, o['oid']) for o in payload['objects']): + if not all( + self._is_authorized(organization, repo, permission, o["oid"]) + for o in payload["objects"] + ): raise response = {"transfer": transfer_type} - action = adapter.get_action(payload['operation'].value, organization, repo) - response['objects'] = [action(**o) for o in payload['objects']] + action = adapter.get_action(payload["operation"].value, organization, repo) + response["objects"] = [action(**o) for o in payload["objects"]] - if all(self._is_error(o, 404) for o in response['objects']): + if all(self._is_error(o, 404) for o in response["objects"]): raise exc.NotFound("Cannot find any of the requested objects") - if all(self._is_error(o) for o in response['objects']): + if all(self._is_error(o) for o in response["objects"]): raise exc.InvalidPayload("Cannot validate any of the requested objects") # TODO: Check Accept header @@ -85,7 +93,7 @@ def post(self, organization, repo): @staticmethod def _is_error(obj: Dict[str, Any], code: Optional[int] = None): try: - return obj['error']['code'] == code or code is None + return obj["error"]["code"] == code or code is None except KeyError: return False @@ -96,5 +104,6 @@ class ViewProvider: This allows transfer and storage backends to register routes for accessing or verifying files, for example, directly from the Giftless HTTP server. """ + def register_views(self, app): pass diff --git a/pytest.ini b/pytest.ini index cd15549..17f0948 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,5 +1,5 @@ [pytest] -addopts = --isort --mypy --doctest-modules +addopts = --mypy --doctest-modules testpaths = tests env = diff --git a/setup.py b/setup.py index 77216c5..91f2dee 100644 --- a/setup.py +++ b/setup.py @@ -1,25 +1,25 @@ from setuptools import find_packages, setup setup( - name='giftless', - packages=find_packages(exclude='./tests'), - version=open('VERSION').read(), - description='A Git LFS Server implementation in Python with support for pluggable backends', - author='Shahar Evron', - author_email='shahar.evron@datopian.com', - long_description=open('README.md').read(), - long_description_content_type='text/markdown', + name="giftless", + packages=find_packages(exclude="./tests"), + version=open("VERSION").read(), + description="A Git LFS Server implementation in Python with support for pluggable backends", + author="Shahar Evron", + author_email="shahar.evron@datopian.com", + long_description=open("README.md").read(), + long_description_content_type="text/markdown", install_requires=[ - 'figcan', - 'flask', - 'flask-marshmallow', - 'marshmallow-enum', - 'pyyaml', - 'PyJWT', - 'webargs', - 'python-dotenv', - 'typing-extensions', - 'flask-classful', + "figcan", + "flask", + "flask-marshmallow", + "marshmallow-enum", + "pyyaml", + "PyJWT", + "webargs", + "python-dotenv", + "typing-extensions", + "flask-classful", ], - include_package_data=True + include_package_data=True, ) diff --git a/tests/auth/test_auth.py b/tests/auth/test_auth.py index 24cc65b..b8b7948 100644 --- a/tests/auth/test_auth.py +++ b/tests/auth/test_auth.py @@ -7,83 +7,241 @@ def test_default_identity_properties(): - """Test the basic properties of the default identity object - """ - user = DefaultIdentity('arthur', 'kingofthebritons', 'arthur@camelot.gov.uk') - assert user.name == 'arthur' - assert user.id == 'kingofthebritons' - assert user.email == 'arthur@camelot.gov.uk' + """Test the basic properties of the default identity object""" + user = DefaultIdentity("arthur", "kingofthebritons", "arthur@camelot.gov.uk") + assert user.name == "arthur" + assert user.id == "kingofthebritons" + assert user.email == "arthur@camelot.gov.uk" -@pytest.mark.parametrize('requested', [ - ({"permission": Permission.READ, "organization": "myorg", "repo": "somerepo"}), - ({"permission": Permission.READ, "organization": "otherorg", "repo": "somerepo"}), - ({"permission": Permission.READ, "organization": "myorg", "repo": "somerepo", "oid": "foobar"}), - ({"permission": Permission.WRITE, "organization": "myorg", "repo": "somerepo"}), -]) +@pytest.mark.parametrize( + "requested", + [ + ({"permission": Permission.READ, "organization": "myorg", "repo": "somerepo"}), + ( + { + "permission": Permission.READ, + "organization": "otherorg", + "repo": "somerepo", + } + ), + ( + { + "permission": Permission.READ, + "organization": "myorg", + "repo": "somerepo", + "oid": "foobar", + } + ), + ({"permission": Permission.WRITE, "organization": "myorg", "repo": "somerepo"}), + ], +) def test_default_identity_denied_by_default(requested): - user = DefaultIdentity('arthur', 'kingofthebritons', 'arthur@camelot.gov.uk') + user = DefaultIdentity("arthur", "kingofthebritons", "arthur@camelot.gov.uk") assert user.is_authorized(**requested) is False -@pytest.mark.parametrize('requested, expected', [ - ({"permission": Permission.READ, "organization": "myorg", "repo": "somerepo"}, True), - ({"permission": Permission.READ, "organization": "otherorg", "repo": "somerepo"}, False), - ({"permission": Permission.READ, "organization": "myorg", "repo": "somerepo"}, True), - ({"permission": Permission.READ, "organization": "myorg", "repo": "somerepo", "oid": "someobject"}, True), - ({"permission": Permission.READ, "organization": "myorg", "repo": "otherrepo"}, False), -]) +@pytest.mark.parametrize( + "requested, expected", + [ + ( + { + "permission": Permission.READ, + "organization": "myorg", + "repo": "somerepo", + }, + True, + ), + ( + { + "permission": Permission.READ, + "organization": "otherorg", + "repo": "somerepo", + }, + False, + ), + ( + { + "permission": Permission.READ, + "organization": "myorg", + "repo": "somerepo", + }, + True, + ), + ( + { + "permission": Permission.READ, + "organization": "myorg", + "repo": "somerepo", + "oid": "someobject", + }, + True, + ), + ( + { + "permission": Permission.READ, + "organization": "myorg", + "repo": "otherrepo", + }, + False, + ), + ], +) def test_default_identity_allow_specific_repo(requested, expected): - user = DefaultIdentity('arthur', 'kingofthebritons', 'arthur@camelot.gov.uk') - user.allow(organization='myorg', repo='somerepo', permissions=Permission.all()) + user = DefaultIdentity("arthur", "kingofthebritons", "arthur@camelot.gov.uk") + user.allow(organization="myorg", repo="somerepo", permissions=Permission.all()) assert expected is user.is_authorized(**requested) -@pytest.mark.parametrize('requested, expected', [ - ({"permission": Permission.READ, "organization": "otherorg", "repo": "somerepo"}, False), - ({"permission": Permission.READ, "organization": "myorg", "repo": "somerepo"}, True), - ({"permission": Permission.READ_META, "organization": "myorg", "repo": "somerepo"}, True), - ({"permission": Permission.WRITE, "organization": "myorg", "repo": "somerepo"}, False), - ({"permission": Permission.READ, "organization": "myorg", "repo": "otherrepo"}, True), - ({"permission": Permission.WRITE, "organization": "myorg", "repo": "otherrepo"}, False), -]) +@pytest.mark.parametrize( + "requested, expected", + [ + ( + { + "permission": Permission.READ, + "organization": "otherorg", + "repo": "somerepo", + }, + False, + ), + ( + { + "permission": Permission.READ, + "organization": "myorg", + "repo": "somerepo", + }, + True, + ), + ( + { + "permission": Permission.READ_META, + "organization": "myorg", + "repo": "somerepo", + }, + True, + ), + ( + { + "permission": Permission.WRITE, + "organization": "myorg", + "repo": "somerepo", + }, + False, + ), + ( + { + "permission": Permission.READ, + "organization": "myorg", + "repo": "otherrepo", + }, + True, + ), + ( + { + "permission": Permission.WRITE, + "organization": "myorg", + "repo": "otherrepo", + }, + False, + ), + ], +) def test_default_identity_allow_specific_org_permissions(requested, expected): - user = DefaultIdentity('arthur', 'kingofthebritons', 'arthur@camelot.gov.uk') - user.allow(organization='myorg', permissions={Permission.READ_META, Permission.READ}) + user = DefaultIdentity("arthur", "kingofthebritons", "arthur@camelot.gov.uk") + user.allow( + organization="myorg", permissions={Permission.READ_META, Permission.READ} + ) assert expected is user.is_authorized(**requested) -@pytest.mark.parametrize('requested, expected', [ - ({"organization": "myorg", "repo": "myrepo", "permission": Permission.READ}, True), - ({"organization": "otherorg", "repo": "otherrepo", "permission": Permission.READ}, True), - ({"organization": "otherorg", "repo": "otherrepo", "permission": Permission.READ_META}, True), - ({"organization": "myorg", "repo": "myrepo", "permission": Permission.WRITE}, False), - ({"organization": "otherorg", "repo": "otherrepo", "permission": Permission.WRITE}, False), -]) +@pytest.mark.parametrize( + "requested, expected", + [ + ( + {"organization": "myorg", "repo": "myrepo", "permission": Permission.READ}, + True, + ), + ( + { + "organization": "otherorg", + "repo": "otherrepo", + "permission": Permission.READ, + }, + True, + ), + ( + { + "organization": "otherorg", + "repo": "otherrepo", + "permission": Permission.READ_META, + }, + True, + ), + ( + {"organization": "myorg", "repo": "myrepo", "permission": Permission.WRITE}, + False, + ), + ( + { + "organization": "otherorg", + "repo": "otherrepo", + "permission": Permission.WRITE, + }, + False, + ), + ], +) def test_allow_anon_read_only(requested, expected): - """Test that an anon user with read only permissions works as expected - """ + """Test that an anon user with read only permissions works as expected""" user = allow_anon.read_only(None) assert expected is user.is_authorized(**requested) -@pytest.mark.parametrize('requested, expected', [ - ({"organization": "myorg", "repo": "myrepo", "permission": Permission.READ}, True), - ({"organization": "otherorg", "repo": "otherrepo", "permission": Permission.READ}, True), - ({"organization": "otherorg", "repo": "otherrepo", "permission": Permission.READ_META}, True), - ({"organization": "myorg", "repo": "myrepo", "permission": Permission.WRITE}, True), - ({"organization": "otherorg", "repo": "otherrepo", "permission": Permission.WRITE}, True), -]) +@pytest.mark.parametrize( + "requested, expected", + [ + ( + {"organization": "myorg", "repo": "myrepo", "permission": Permission.READ}, + True, + ), + ( + { + "organization": "otherorg", + "repo": "otherrepo", + "permission": Permission.READ, + }, + True, + ), + ( + { + "organization": "otherorg", + "repo": "otherrepo", + "permission": Permission.READ_META, + }, + True, + ), + ( + {"organization": "myorg", "repo": "myrepo", "permission": Permission.WRITE}, + True, + ), + ( + { + "organization": "otherorg", + "repo": "otherrepo", + "permission": Permission.WRITE, + }, + True, + ), + ], +) def test_allow_anon_read_write(requested, expected): - """Test that an anon user with read only permissions works as expected - """ + """Test that an anon user with read only permissions works as expected""" user = allow_anon.read_write(None) assert expected is user.is_authorized(**requested) def test_anon_user_interface(): - """Test that an anonymous user has the right interface - """ + """Test that an anonymous user has the right interface""" user = allow_anon.read_only(None) assert isinstance(user, allow_anon.AnonymousUser) - assert user.name == 'anonymous' + assert user.name == "anonymous" diff --git a/tests/auth/test_jwt.py b/tests/auth/test_jwt.py index f3dedec..c512cb3 100644 --- a/tests/auth/test_jwt.py +++ b/tests/auth/test_jwt.py @@ -12,252 +12,484 @@ from giftless.auth.jwt import JWTAuthenticator, Scope, factory # Symmetric key used in tests -JWT_HS_KEY = b'some-random-secret' +JWT_HS_KEY = b"some-random-secret" # Asymmetric key files used in tests -JWT_RS_PRI_KEY = os.path.join(os.path.dirname(__file__), 'data', 'test-key.pem') -JWT_RS_PUB_KEY = os.path.join(os.path.dirname(__file__), 'data', 'test-key.pub.pem') +JWT_RS_PRI_KEY = os.path.join(os.path.dirname(__file__), "data", "test-key.pem") +JWT_RS_PUB_KEY = os.path.join(os.path.dirname(__file__), "data", "test-key.pub.pem") def test_jwt_can_authorize_request_symmetric_key(app): - """Test basic JWT authorizer functionality - """ - authz = JWTAuthenticator(private_key=JWT_HS_KEY, algorithm='HS256') + """Test basic JWT authorizer functionality""" + authz = JWTAuthenticator(private_key=JWT_HS_KEY, algorithm="HS256") token = _get_test_token() - with app.test_request_context('/myorg/myrepo/objects/batch', method='POST', headers={ - "Authorization": f'Bearer {token}' - }): + with app.test_request_context( + "/myorg/myrepo/objects/batch", + method="POST", + headers={"Authorization": f"Bearer {token}"}, + ): identity = authz(flask.request) - assert identity.id == 'some-user-id' + assert identity.id == "some-user-id" def test_jwt_can_authorize_request_asymmetric_key(app): - """Test basic JWT authorizer functionality - """ - authz = factory(public_key_file=JWT_RS_PUB_KEY, algorithm='RS256') - token = _get_test_token(algo='RS256') - with app.test_request_context('/myorg/myrepo/objects/batch', method='POST', headers={ - "Authorization": f'Bearer {token}' - }): + """Test basic JWT authorizer functionality""" + authz = factory(public_key_file=JWT_RS_PUB_KEY, algorithm="RS256") + token = _get_test_token(algo="RS256") + with app.test_request_context( + "/myorg/myrepo/objects/batch", + method="POST", + headers={"Authorization": f"Bearer {token}"}, + ): identity = authz(flask.request) - assert identity.id == 'some-user-id' + assert identity.id == "some-user-id" def test_jwt_can_authorize_request_token_in_qs(app): - """Test basic JWT authorizer functionality - """ - authz = JWTAuthenticator(private_key=JWT_HS_KEY, algorithm='HS256') + """Test basic JWT authorizer functionality""" + authz = JWTAuthenticator(private_key=JWT_HS_KEY, algorithm="HS256") token = _get_test_token() - with app.test_request_context(f'/myorg/myrepo/objects/batch?jwt={token}', method='POST'): + with app.test_request_context( + f"/myorg/myrepo/objects/batch?jwt={token}", method="POST" + ): identity = authz(flask.request) - assert identity.id == 'some-user-id' + assert identity.id == "some-user-id" def test_jwt_can_authorize_request_token_as_basic_password(app): - """Test that we can pass a JWT token as 'Basic' authorization password - """ - authz = JWTAuthenticator(private_key=JWT_HS_KEY, algorithm='HS256') + """Test that we can pass a JWT token as 'Basic' authorization password""" + authz = JWTAuthenticator(private_key=JWT_HS_KEY, algorithm="HS256") token = _get_test_token() - auth_value = base64.b64encode(b':'.join([b'_jwt', token.encode('ascii')])).decode('ascii') - - with app.test_request_context('/myorg/myrepo/objects/batch', method='POST', headers={ - "Authorization": f'Basic {auth_value}' - }): + auth_value = base64.b64encode(b":".join([b"_jwt", token.encode("ascii")])).decode( + "ascii" + ) + + with app.test_request_context( + "/myorg/myrepo/objects/batch", + method="POST", + headers={"Authorization": f"Basic {auth_value}"}, + ): identity = authz(flask.request) - assert identity.id == 'some-user-id' + assert identity.id == "some-user-id" def test_jwt_can_authorize_request_token_basic_password_disabled(app): - """Test that we can pass a JWT token as 'Basic' authorization password - """ - authz = JWTAuthenticator(private_key=JWT_HS_KEY, algorithm='HS256', basic_auth_user=None) + """Test that we can pass a JWT token as 'Basic' authorization password""" + authz = JWTAuthenticator( + private_key=JWT_HS_KEY, algorithm="HS256", basic_auth_user=None + ) token = _get_test_token() - auth_value = base64.b64encode(b':'.join([b'_jwt', token.encode('ascii')])).decode('ascii') - - with app.test_request_context('/myorg/myrepo/objects/batch', method='POST', headers={ - "Authorization": f'Basic {auth_value}' - }): + auth_value = base64.b64encode(b":".join([b"_jwt", token.encode("ascii")])).decode( + "ascii" + ) + + with app.test_request_context( + "/myorg/myrepo/objects/batch", + method="POST", + headers={"Authorization": f"Basic {auth_value}"}, + ): identity = authz(flask.request) assert None is identity def test_jwt_with_wrong_kid_doesnt_authorize_request(app): - """JWT authorizer only considers a JWT token if it has the right key ID in the header - """ - authz = JWTAuthenticator(private_key=JWT_HS_KEY, algorithm='HS256', key_id='must-be-this-key') + """JWT authorizer only considers a JWT token if it has the right key ID in the header""" + authz = JWTAuthenticator( + private_key=JWT_HS_KEY, algorithm="HS256", key_id="must-be-this-key" + ) token = _get_test_token() - with app.test_request_context('/myorg/myrepo/objects/batch', method='POST', headers={ - "Authorization": f'Bearer {token}' - }): + with app.test_request_context( + "/myorg/myrepo/objects/batch", + method="POST", + headers={"Authorization": f"Bearer {token}"}, + ): identity = authz(flask.request) assert None is identity def test_jwt_expired_throws_401(app): - """If we get a JWT token who's expired, we should raise a 401 error - """ - authz = JWTAuthenticator(private_key=JWT_HS_KEY, algorithm='HS256') + """If we get a JWT token who's expired, we should raise a 401 error""" + authz = JWTAuthenticator(private_key=JWT_HS_KEY, algorithm="HS256") token = _get_test_token(lifetime=-600) # expired 10 minutes ago - with app.test_request_context('/myorg/myrepo/objects/batch', method='POST', headers={ - "Authorization": f'Bearer {token}' - }): + with app.test_request_context( + "/myorg/myrepo/objects/batch", + method="POST", + headers={"Authorization": f"Bearer {token}"}, + ): with pytest.raises(Unauthorized): authz(flask.request) def test_jwt_pre_authorize_action(): - authz = JWTAuthenticator(private_key=JWT_HS_KEY, algorithm='HS256', default_lifetime=120) - identity = DefaultIdentity(name='joe', email='joe@shmoe.com', id='babab0ba') - header = authz.get_authz_header(identity, 'myorg', 'somerepo', actions={'read'}) + authz = JWTAuthenticator( + private_key=JWT_HS_KEY, algorithm="HS256", default_lifetime=120 + ) + identity = DefaultIdentity(name="joe", email="joe@shmoe.com", id="babab0ba") + header = authz.get_authz_header(identity, "myorg", "somerepo", actions={"read"}) - auth_type, token = header['Authorization'].split(' ') - assert 'Bearer' == auth_type + auth_type, token = header["Authorization"].split(" ") + assert "Bearer" == auth_type - payload = jwt.decode(token, JWT_HS_KEY, algorithms='HS256') - assert payload['sub'] == 'babab0ba' - assert payload['scopes'] == 'obj:myorg/somerepo/*:read' + payload = jwt.decode(token, JWT_HS_KEY, algorithms="HS256") + assert payload["sub"] == "babab0ba" + assert payload["scopes"] == "obj:myorg/somerepo/*:read" # Check that now() - expiration time is within 5 seconds of 120 seconds - assert abs((datetime.fromtimestamp(payload['exp']) - datetime.now()).seconds - 120) < 5 + assert ( + abs((datetime.fromtimestamp(payload["exp"]) - datetime.now()).seconds - 120) < 5 + ) def test_jwt_pre_authorize_action_custom_lifetime(): - authz = JWTAuthenticator(private_key=JWT_HS_KEY, algorithm='HS256', default_lifetime=120) - identity = DefaultIdentity(name='joe', email='joe@shmoe.com', id='babab0ba') - header = authz.get_authz_header(identity, 'myorg', 'somerepo', actions={'read'}, lifetime=3600) + authz = JWTAuthenticator( + private_key=JWT_HS_KEY, algorithm="HS256", default_lifetime=120 + ) + identity = DefaultIdentity(name="joe", email="joe@shmoe.com", id="babab0ba") + header = authz.get_authz_header( + identity, "myorg", "somerepo", actions={"read"}, lifetime=3600 + ) - auth_type, token = header['Authorization'].split(' ') - assert 'Bearer' == auth_type + auth_type, token = header["Authorization"].split(" ") + assert "Bearer" == auth_type - payload = jwt.decode(token, JWT_HS_KEY, algorithms='HS256') - assert payload['sub'] == 'babab0ba' - assert payload['scopes'] == 'obj:myorg/somerepo/*:read' + payload = jwt.decode(token, JWT_HS_KEY, algorithms="HS256") + assert payload["sub"] == "babab0ba" + assert payload["scopes"] == "obj:myorg/somerepo/*:read" # Check that now() - expiration time is within 5 seconds of 3600 seconds - assert abs((datetime.fromtimestamp(payload['exp']) - datetime.now()).seconds - 3600) < 5 - - -@pytest.mark.parametrize('scopes, auth_check, expected', [ - ([], - {"organization": "myorg", "repo": "myrepo", "permission": Permission.READ}, False), - (['blah:foo/bar:*'], - {"organization": "myorg", "repo": "myrepo", "permission": Permission.READ}, False), - (['obj:myorg/myrepo/*'], - {"organization": "myorg", "repo": "myrepo", "permission": Permission.READ}, True), - (['obj:myorg/myrepo/*'], - {"organization": "myorg", "repo": "myrepo", "permission": Permission.WRITE}, True), - (['obj:myorg/otherrepo/*'], - {"organization": "myorg", "repo": "myrepo", "permission": Permission.READ}, False), - (['obj:myorg/myrepo/*'], - {"organization": "myorg", "repo": "myrepo", "permission": Permission.READ}, True), - (['obj:myorg/myrepo/*:read'], - {"organization": "myorg", "repo": "myrepo", "permission": Permission.WRITE}, False), - (['obj:myorg/myrepo/*:write'], - {"organization": "myorg", "repo": "myrepo", "permission": Permission.WRITE}, True), - (['obj:myorg/myrepo/*:read,write'], - {"organization": "myorg", "repo": "myrepo", "permission": Permission.WRITE}, True), - (['obj:myorg/myrepo/*:read,verify'], - {"organization": "myorg", "repo": "myrepo", "permission": Permission.READ_META}, True), - (['obj:myorg/myrepo/*:read'], - {"organization": "myorg", "repo": "myrepo", "permission": Permission.READ_META}, True), - (['obj:myorg/myrepo/*:meta:*'], - {"organization": "myorg", "repo": "myrepo", "permission": Permission.READ_META}, True), - (['obj:myorg/myrepo/*:meta:read,write,verify'], - {"organization": "myorg", "repo": "myrepo", "permission": Permission.READ}, False), - ('obj:myorg/myrepo/*:meta:*', - {"organization": "myorg", "repo": "myrepo", "permission": Permission.READ_META}, True), - ('obj:myorg/*/*:read', - {"organization": "myorg", "repo": "myrepo", "permission": Permission.READ}, True), - ('obj:myorg/*/*:read', - {"organization": "otherorg", "repo": "myrepo", "permission": Permission.READ}, False), - ('obj:myorg/*/*:read', - {"organization": "myorg", "repo": "myrepo", "permission": Permission.WRITE}, False), - ('obj:*/*/*:read', - {"organization": "myorg", "repo": "myrepo", "permission": Permission.READ}, True), - ('obj:*/*/*:read', - {"organization": "otherorg", "repo": "myrepo", "permission": Permission.READ}, True), - ('obj:*/*/someobjectid:read', - {"organization": "otherorg", "repo": "myrepo", "permission": Permission.READ}, False), - ('obj:*/*/someobjectid:read', - {"organization": "otherorg", "repo": "myrepo", "oid": "someobjectid", "permission": Permission.READ}, True), - ('obj:*/*/someobjectid:read', - {"organization": "otherorg", "repo": "myrepo", "oid": "otherobjectid", "permission": Permission.READ}, False), - ('obj:someobjectid:read', - {"organization": "myorg", "repo": "anonrelatedrepo", "oid": "someobjectid", "permission": Permission.READ}, True), -]) + assert ( + abs((datetime.fromtimestamp(payload["exp"]) - datetime.now()).seconds - 3600) + < 5 + ) + + +@pytest.mark.parametrize( + "scopes, auth_check, expected", + [ + ( + [], + {"organization": "myorg", "repo": "myrepo", "permission": Permission.READ}, + False, + ), + ( + ["blah:foo/bar:*"], + {"organization": "myorg", "repo": "myrepo", "permission": Permission.READ}, + False, + ), + ( + ["obj:myorg/myrepo/*"], + {"organization": "myorg", "repo": "myrepo", "permission": Permission.READ}, + True, + ), + ( + ["obj:myorg/myrepo/*"], + {"organization": "myorg", "repo": "myrepo", "permission": Permission.WRITE}, + True, + ), + ( + ["obj:myorg/otherrepo/*"], + {"organization": "myorg", "repo": "myrepo", "permission": Permission.READ}, + False, + ), + ( + ["obj:myorg/myrepo/*"], + {"organization": "myorg", "repo": "myrepo", "permission": Permission.READ}, + True, + ), + ( + ["obj:myorg/myrepo/*:read"], + {"organization": "myorg", "repo": "myrepo", "permission": Permission.WRITE}, + False, + ), + ( + ["obj:myorg/myrepo/*:write"], + {"organization": "myorg", "repo": "myrepo", "permission": Permission.WRITE}, + True, + ), + ( + ["obj:myorg/myrepo/*:read,write"], + {"organization": "myorg", "repo": "myrepo", "permission": Permission.WRITE}, + True, + ), + ( + ["obj:myorg/myrepo/*:read,verify"], + { + "organization": "myorg", + "repo": "myrepo", + "permission": Permission.READ_META, + }, + True, + ), + ( + ["obj:myorg/myrepo/*:read"], + { + "organization": "myorg", + "repo": "myrepo", + "permission": Permission.READ_META, + }, + True, + ), + ( + ["obj:myorg/myrepo/*:meta:*"], + { + "organization": "myorg", + "repo": "myrepo", + "permission": Permission.READ_META, + }, + True, + ), + ( + ["obj:myorg/myrepo/*:meta:read,write,verify"], + {"organization": "myorg", "repo": "myrepo", "permission": Permission.READ}, + False, + ), + ( + "obj:myorg/myrepo/*:meta:*", + { + "organization": "myorg", + "repo": "myrepo", + "permission": Permission.READ_META, + }, + True, + ), + ( + "obj:myorg/*/*:read", + {"organization": "myorg", "repo": "myrepo", "permission": Permission.READ}, + True, + ), + ( + "obj:myorg/*/*:read", + { + "organization": "otherorg", + "repo": "myrepo", + "permission": Permission.READ, + }, + False, + ), + ( + "obj:myorg/*/*:read", + {"organization": "myorg", "repo": "myrepo", "permission": Permission.WRITE}, + False, + ), + ( + "obj:*/*/*:read", + {"organization": "myorg", "repo": "myrepo", "permission": Permission.READ}, + True, + ), + ( + "obj:*/*/*:read", + { + "organization": "otherorg", + "repo": "myrepo", + "permission": Permission.READ, + }, + True, + ), + ( + "obj:*/*/someobjectid:read", + { + "organization": "otherorg", + "repo": "myrepo", + "permission": Permission.READ, + }, + False, + ), + ( + "obj:*/*/someobjectid:read", + { + "organization": "otherorg", + "repo": "myrepo", + "oid": "someobjectid", + "permission": Permission.READ, + }, + True, + ), + ( + "obj:*/*/someobjectid:read", + { + "organization": "otherorg", + "repo": "myrepo", + "oid": "otherobjectid", + "permission": Permission.READ, + }, + False, + ), + ( + "obj:someobjectid:read", + { + "organization": "myorg", + "repo": "anonrelatedrepo", + "oid": "someobjectid", + "permission": Permission.READ, + }, + True, + ), + ], +) def test_jwt_scopes_authorize_actions(app, scopes, auth_check, expected): - """Test that JWT token scopes can control authorization - """ - authz = JWTAuthenticator(private_key=JWT_HS_KEY, algorithm='HS256') + """Test that JWT token scopes can control authorization""" + authz = JWTAuthenticator(private_key=JWT_HS_KEY, algorithm="HS256") token = _get_test_token(scopes=scopes) - with app.test_request_context('/myorg/myrepo/objects/batch', method='POST', headers={ - "Authorization": f'Bearer {token}' - }): + with app.test_request_context( + "/myorg/myrepo/objects/batch", + method="POST", + headers={"Authorization": f"Bearer {token}"}, + ): identity = authz(flask.request) assert identity.is_authorized(**auth_check) is expected def test_jwt_scopes_authorize_actions_with_anon_user(app): - """Test that authorization works even if we don't have any user ID / email / name - """ - scopes = ['obj:myorg/myrepo/*'] - authz = JWTAuthenticator(private_key=JWT_HS_KEY, algorithm='HS256') + """Test that authorization works even if we don't have any user ID / email / name""" + scopes = ["obj:myorg/myrepo/*"] + authz = JWTAuthenticator(private_key=JWT_HS_KEY, algorithm="HS256") token = _get_test_token(scopes=scopes, sub=None, name=None, email=None) - with app.test_request_context('/myorg/myrepo/objects/batch', method='POST', headers={ - "Authorization": f'Bearer {token}' - }): + with app.test_request_context( + "/myorg/myrepo/objects/batch", + method="POST", + headers={"Authorization": f"Bearer {token}"}, + ): identity = authz(flask.request) - assert identity.is_authorized(organization='myorg', repo='myrepo', permission=Permission.READ) - - -@pytest.mark.parametrize('scope_str, expected', [ - ('org:myorg:*', {'entity_type': 'org', 'entity_ref': 'myorg', 'actions': None, 'subscope': None}), - ('org:myorg', {'entity_type': 'org', 'entity_ref': 'myorg', 'actions': None, 'subscope': None}), - ('ds', {'entity_type': 'ds', 'entity_ref': None, 'actions': None, 'subscope': None}), - ('ds:*', {'entity_type': 'ds', 'entity_ref': None, 'actions': None, 'subscope': None}), - ('ds:*:read', {'entity_type': 'ds', 'entity_ref': None, 'actions': {'read'}, 'subscope': None}), - ('ds:foobaz:meta:read', {'entity_type': 'ds', 'entity_ref': 'foobaz', 'actions': {'read'}, 'subscope': 'meta'}), - ('ds:foobaz:*:read', {'entity_type': 'ds', 'entity_ref': 'foobaz', 'actions': {'read'}, 'subscope': None}), - ('ds:foobaz:meta:*', {'entity_type': 'ds', 'entity_ref': 'foobaz', 'actions': None, 'subscope': 'meta'}), - ('ds:foobaz:delete', {'entity_type': 'ds', 'entity_ref': 'foobaz', 'actions': {'delete'}, 'subscope': None}), - ('ds:foobaz:create,delete', {'entity_type': 'ds', 'entity_ref': 'foobaz', 'actions': {'create', 'delete'}, - 'subscope': None}), - -]) + assert identity.is_authorized( + organization="myorg", repo="myrepo", permission=Permission.READ + ) + + +@pytest.mark.parametrize( + "scope_str, expected", + [ + ( + "org:myorg:*", + { + "entity_type": "org", + "entity_ref": "myorg", + "actions": None, + "subscope": None, + }, + ), + ( + "org:myorg", + { + "entity_type": "org", + "entity_ref": "myorg", + "actions": None, + "subscope": None, + }, + ), + ( + "ds", + { + "entity_type": "ds", + "entity_ref": None, + "actions": None, + "subscope": None, + }, + ), + ( + "ds:*", + { + "entity_type": "ds", + "entity_ref": None, + "actions": None, + "subscope": None, + }, + ), + ( + "ds:*:read", + { + "entity_type": "ds", + "entity_ref": None, + "actions": {"read"}, + "subscope": None, + }, + ), + ( + "ds:foobaz:meta:read", + { + "entity_type": "ds", + "entity_ref": "foobaz", + "actions": {"read"}, + "subscope": "meta", + }, + ), + ( + "ds:foobaz:*:read", + { + "entity_type": "ds", + "entity_ref": "foobaz", + "actions": {"read"}, + "subscope": None, + }, + ), + ( + "ds:foobaz:meta:*", + { + "entity_type": "ds", + "entity_ref": "foobaz", + "actions": None, + "subscope": "meta", + }, + ), + ( + "ds:foobaz:delete", + { + "entity_type": "ds", + "entity_ref": "foobaz", + "actions": {"delete"}, + "subscope": None, + }, + ), + ( + "ds:foobaz:create,delete", + { + "entity_type": "ds", + "entity_ref": "foobaz", + "actions": {"create", "delete"}, + "subscope": None, + }, + ), + ], +) def test_scope_parsing(scope_str, expected): - """Test scope string parsing works as expected - """ + """Test scope string parsing works as expected""" scope = Scope.from_string(scope_str) for k, v in expected.items(): assert getattr(scope, k) == v -@pytest.mark.parametrize('scope, expected', [ - (Scope('org', 'myorg'), 'org:myorg'), - (Scope('org', 'myorg', subscope='meta'), 'org:myorg:meta:*'), - (Scope('ds'), 'ds'), - (Scope('ds', 'foobaz', {'read'}), 'ds:foobaz:read'), - (Scope('ds', 'foobaz', {'read'}, 'meta'), 'ds:foobaz:meta:read'), - (Scope('ds', actions={'read'}, subscope='meta'), 'ds:*:meta:read'), - (Scope('ds', actions={'read', 'write'}, subscope='meta'), 'ds:*:meta:read,write'), -]) +@pytest.mark.parametrize( + "scope, expected", + [ + (Scope("org", "myorg"), "org:myorg"), + (Scope("org", "myorg", subscope="meta"), "org:myorg:meta:*"), + (Scope("ds"), "ds"), + (Scope("ds", "foobaz", {"read"}), "ds:foobaz:read"), + (Scope("ds", "foobaz", {"read"}, "meta"), "ds:foobaz:meta:read"), + (Scope("ds", actions={"read"}, subscope="meta"), "ds:*:meta:read"), + ( + Scope("ds", actions={"read", "write"}, subscope="meta"), + "ds:*:meta:read,write", + ), + ], +) def test_scope_stringify(scope, expected): - """Test scope stringification works as expected - """ + """Test scope stringification works as expected""" assert expected == str(scope) -def _get_test_token(lifetime=300, headers=None, algo='HS256', **kwargs): - payload = {"exp": datetime.now(tz=pytz.utc) + timedelta(seconds=lifetime), - "sub": 'some-user-id'} +def _get_test_token(lifetime=300, headers=None, algo="HS256", **kwargs): + payload = { + "exp": datetime.now(tz=pytz.utc) + timedelta(seconds=lifetime), + "sub": "some-user-id", + } payload.update(kwargs) - if algo == 'HS256': + if algo == "HS256": key = JWT_HS_KEY - elif algo == 'RS256': + elif algo == "RS256": with open(JWT_RS_PRI_KEY) as f: key = f.read() else: diff --git a/tests/conftest.py b/tests/conftest.py index 30c59a1..16cf975 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -19,21 +19,16 @@ def storage_path(tmp_path): @pytest.fixture() def app(storage_path): - """Session fixture to configure the Flask app - """ - app = init_app(additional_config={ - "TESTING": True, - "TRANSFER_ADAPTERS": { - "basic": { - "options": { - "storage_options": { - "path": storage_path - } - } - } + """Session fixture to configure the Flask app""" + app = init_app( + additional_config={ + "TESTING": True, + "TRANSFER_ADAPTERS": { + "basic": {"options": {"storage_options": {"path": storage_path}}} + }, } - }) - app.config.update({"SERVER_NAME": 'giftless.local'}) + ) + app.config.update({"SERVER_NAME": "giftless.local"}) return app @@ -54,7 +49,9 @@ def test_client(app_context: AppContext): @pytest.fixture() -def authz_full_access(app_context): # needed to ensure we call init_authenticators before app context is destroyed +def authz_full_access( + app_context, +): # needed to ensure we call init_authenticators before app context is destroyed """Fixture that enables full anonymous access to all actions for tests that use it """ diff --git a/tests/helpers.py b/tests/helpers.py index 5b20643..1613247 100644 --- a/tests/helpers.py +++ b/tests/helpers.py @@ -4,18 +4,12 @@ def batch_request_payload(delete_keys=(), **kwargs): - """Generate sample batch request payload - """ + """Generate sample batch request payload""" payload = { "operation": "download", "transfers": ["basic"], "ref": {"name": "refs/heads/master"}, - "objects": [ - { - "oid": "12345678", - "size": 8 - } - ] + "objects": [{"oid": "12345678", "size": 8}], } for key in delete_keys: @@ -35,6 +29,6 @@ def create_file_in_storage(storage_path, org, repo, filename, size=1): """ repo_path = os.path.join(storage_path, org, repo) os.makedirs(repo_path, exist_ok=True) - with open(os.path.join(repo_path, filename), 'wb') as f: - for c in (b'0' for _ in range(size)): + with open(os.path.join(repo_path, filename), "wb") as f: + for c in (b"0" for _ in range(size)): f.write(c) diff --git a/tests/storage/__init__.py b/tests/storage/__init__.py index fcfcf74..122261f 100644 --- a/tests/storage/__init__.py +++ b/tests/storage/__init__.py @@ -5,7 +5,7 @@ from giftless.storage import ExternalStorage, StreamingStorage from giftless.storage.exc import ObjectNotFound -ARBITRARY_OID = '2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824' +ARBITRARY_OID = "2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824" class _CommonStorageAbstractTests: @@ -13,30 +13,27 @@ class _CommonStorageAbstractTests: This should not be used directly, because it is inherited by other AbstractTest test suites. """ + def test_get_size(self, storage_backend): - """Test getting the size of a stored object - """ - content = b'The contents of a file-like object' - storage_backend.put('org/repo', ARBITRARY_OID, io.BytesIO(content)) - assert len(content) == storage_backend.get_size('org/repo', ARBITRARY_OID) + """Test getting the size of a stored object""" + content = b"The contents of a file-like object" + storage_backend.put("org/repo", ARBITRARY_OID, io.BytesIO(content)) + assert len(content) == storage_backend.get_size("org/repo", ARBITRARY_OID) def test_get_size_not_existing(self, storage_backend): - """Test getting the size of a non-existing object raises an exception - """ + """Test getting the size of a non-existing object raises an exception""" with pytest.raises(ObjectNotFound): - storage_backend.get_size('org/repo', ARBITRARY_OID) + storage_backend.get_size("org/repo", ARBITRARY_OID) def test_exists_exists(self, storage_backend: StreamingStorage): - """Test that calling exists on an existing object returns True - """ - content = b'The contents of a file-like object' - storage_backend.put('org/repo', ARBITRARY_OID, io.BytesIO(content)) - assert storage_backend.exists('org/repo', ARBITRARY_OID) + """Test that calling exists on an existing object returns True""" + content = b"The contents of a file-like object" + storage_backend.put("org/repo", ARBITRARY_OID, io.BytesIO(content)) + assert storage_backend.exists("org/repo", ARBITRARY_OID) def test_exists_not_exists(self, storage_backend: StreamingStorage): - """Test that calling exists on a non-existing object returns False - """ - assert not storage_backend.exists('org/repo', ARBITRARY_OID) + """Test that calling exists on a non-existing object returns False""" + assert not storage_backend.exists("org/repo", ARBITRARY_OID) class _VerifiableStorageAbstractTests: @@ -44,21 +41,26 @@ class _VerifiableStorageAbstractTests: This should not be used directly, because it is inherited by other AbstractTest test suites. """ + def test_verify_object_ok(self, storage_backend): - content = b'The contents of a file-like object' - storage_backend.put('org/repo', ARBITRARY_OID, io.BytesIO(content)) - assert storage_backend.verify_object('org/repo', ARBITRARY_OID, len(content)) + content = b"The contents of a file-like object" + storage_backend.put("org/repo", ARBITRARY_OID, io.BytesIO(content)) + assert storage_backend.verify_object("org/repo", ARBITRARY_OID, len(content)) def test_verify_object_wrong_size(self, storage_backend): - content = b'The contents of a file-like object' - storage_backend.put('org/repo', ARBITRARY_OID, io.BytesIO(content)) - assert not storage_backend.verify_object('org/repo', ARBITRARY_OID, len(content) + 2) + content = b"The contents of a file-like object" + storage_backend.put("org/repo", ARBITRARY_OID, io.BytesIO(content)) + assert not storage_backend.verify_object( + "org/repo", ARBITRARY_OID, len(content) + 2 + ) def test_verify_object_not_there(self, storage_backend): - assert not storage_backend.verify_object('org/repo', ARBITRARY_OID, 0) + assert not storage_backend.verify_object("org/repo", ARBITRARY_OID, 0) -class StreamingStorageAbstractTests(_CommonStorageAbstractTests, _VerifiableStorageAbstractTests): +class StreamingStorageAbstractTests( + _CommonStorageAbstractTests, _VerifiableStorageAbstractTests +): """Mixin for testing the StreamingStorage methods of a backend that implements StreamingStorage To use, create a concrete test class mixing this class in, and define a fixture named @@ -66,40 +68,45 @@ class StreamingStorageAbstractTests(_CommonStorageAbstractTests, _VerifiableStor """ def test_put_get_object(self, storage_backend: StreamingStorage): - """Test a full put-then-get cycle - """ - content = b'The contents of a file-like object' - written = storage_backend.put('org/repo', ARBITRARY_OID, io.BytesIO(content)) + """Test a full put-then-get cycle""" + content = b"The contents of a file-like object" + written = storage_backend.put("org/repo", ARBITRARY_OID, io.BytesIO(content)) assert len(content) == written - fetched = storage_backend.get('org/repo', ARBITRARY_OID) - fetched_content = b''.join(fetched) + fetched = storage_backend.get("org/repo", ARBITRARY_OID) + fetched_content = b"".join(fetched) assert content == fetched_content def test_get_raises_if_not_found(self, storage_backend: StreamingStorage): - """Test that calling get for a non-existing object raises - """ + """Test that calling get for a non-existing object raises""" with pytest.raises(ObjectNotFound): - storage_backend.get('org/repo', ARBITRARY_OID) + storage_backend.get("org/repo", ARBITRARY_OID) -class ExternalStorageAbstractTests(_CommonStorageAbstractTests, _VerifiableStorageAbstractTests): +class ExternalStorageAbstractTests( + _CommonStorageAbstractTests, _VerifiableStorageAbstractTests +): """Mixin for testing the ExternalStorage methods of a backend that implements ExternalStorage To use, create a concrete test class mixing this class in, and define a fixture named ``storage_backend`` that returns an appropriate storage backend object. """ + def test_get_upload_action(self, storage_backend: ExternalStorage): - action_spec = storage_backend.get_upload_action('org/repo', ARBITRARY_OID, 100, 3600) - upload = action_spec['actions']['upload'] - assert upload['href'][0:4] == 'http' - assert upload['expires_in'] == 3600 + action_spec = storage_backend.get_upload_action( + "org/repo", ARBITRARY_OID, 100, 3600 + ) + upload = action_spec["actions"]["upload"] + assert upload["href"][0:4] == "http" + assert upload["expires_in"] == 3600 return upload def test_get_download_action(self, storage_backend): - action_spec = storage_backend.get_download_action('org/repo', ARBITRARY_OID, 100, 7200) - download = action_spec['actions']['download'] - assert download['href'][0:4] == 'http' - assert download['expires_in'] == 7200 + action_spec = storage_backend.get_download_action( + "org/repo", ARBITRARY_OID, 100, 7200 + ) + download = action_spec["actions"]["download"] + assert download["href"][0:4] == "http" + assert download["expires_in"] == 7200 return download diff --git a/tests/storage/test_amazon_s3.py b/tests/storage/test_amazon_s3.py index 3b3952c..70480ae 100644 --- a/tests/storage/test_amazon_s3.py +++ b/tests/storage/test_amazon_s3.py @@ -12,7 +12,7 @@ from . import ARBITRARY_OID, ExternalStorageAbstractTests, StreamingStorageAbstractTests -TEST_AWS_S3_BUCKET_NAME = 'test-giftless' +TEST_AWS_S3_BUCKET_NAME = "test-giftless" @pytest.fixture() @@ -29,7 +29,7 @@ def storage_backend() -> Generator[AmazonS3Storage, None, None]: If these variables are not set, and pytest-vcr is not in use, the tests *will* fail. """ - prefix = 'giftless-tests' + prefix = "giftless-tests" # We use a live S3 bucket to test storage = AmazonS3Storage(bucket_name=TEST_AWS_S3_BUCKET_NAME, path_prefix=prefix) @@ -43,31 +43,32 @@ def storage_backend() -> Generator[AmazonS3Storage, None, None]: raise pytest.PytestWarning("Could not clean up after test: {}".format(e)) -@pytest.fixture(scope='module') +@pytest.fixture(scope="module") def vcr_config(): - live_tests = bool(os.environ.get('AWS_ACCESS_KEY_ID') and - os.environ.get('AWS_SECRET_ACCESS_KEY')) + live_tests = bool( + os.environ.get("AWS_ACCESS_KEY_ID") and os.environ.get("AWS_SECRET_ACCESS_KEY") + ) if live_tests: - mode = 'once' + mode = "once" else: - os.environ['AWS_ACCESS_KEY_ID'] = 'fake' - os.environ['AWS_SECRET_ACCESS_KEY'] = 'fake' - os.environ['AWS_DEFAULT_REGION'] = 'us-east-1' - mode = 'none' + os.environ["AWS_ACCESS_KEY_ID"] = "fake" + os.environ["AWS_SECRET_ACCESS_KEY"] = "fake" + os.environ["AWS_DEFAULT_REGION"] = "us-east-1" + mode = "none" return { - "filter_headers": [ - ('authorization', 'fake-authz-header') - ], - "record_mode": mode + "filter_headers": [("authorization", "fake-authz-header")], + "record_mode": mode, } @pytest.mark.vcr() -class TestAmazonS3StorageBackend(StreamingStorageAbstractTests, ExternalStorageAbstractTests): +class TestAmazonS3StorageBackend( + StreamingStorageAbstractTests, ExternalStorageAbstractTests +): def test_get_upload_action(self, storage_backend: ExternalStorage): upload = super().test_get_upload_action(storage_backend) - assert upload['header']['Content-Type'] == 'application/octet-stream' + assert upload["header"]["Content-Type"] == "application/octet-stream" - b64_oid = upload['header']['x-amz-checksum-sha256'] + b64_oid = upload["header"]["x-amz-checksum-sha256"] assert b64decode(b64_oid) == unhexlify(ARBITRARY_OID) diff --git a/tests/storage/test_azure.py b/tests/storage/test_azure.py index 54ab5a1..a1fcfad 100644 --- a/tests/storage/test_azure.py +++ b/tests/storage/test_azure.py @@ -11,8 +11,8 @@ from . import ExternalStorageAbstractTests, StreamingStorageAbstractTests -MOCK_AZURE_ACCOUNT_NAME = 'my-account' -MOCK_AZURE_CONTAINER_NAME = 'my-container' +MOCK_AZURE_ACCOUNT_NAME = "my-account" +MOCK_AZURE_CONTAINER_NAME = "my-container" @pytest.fixture() @@ -24,13 +24,15 @@ def storage_backend() -> Generator[AzureBlobsStorage, None, None]: If these variables are not set, and pytest-vcr is not in use, the tests *will* fail. """ - connection_str = os.environ.get('AZURE_CONNECTION_STRING') - container_name = os.environ.get('AZURE_CONTAINER') - prefix = 'giftless-tests' + connection_str = os.environ.get("AZURE_CONNECTION_STRING") + container_name = os.environ.get("AZURE_CONTAINER") + prefix = "giftless-tests" if container_name and connection_str: # We use a live Azure container to test - client: BlobServiceClient = BlobServiceClient.from_connection_string(connection_str) + client: BlobServiceClient = BlobServiceClient.from_connection_string( + connection_str + ) try: yield AzureBlobsStorage(connection_str, container_name, path_prefix=prefix) finally: @@ -41,26 +43,32 @@ def storage_backend() -> Generator[AzureBlobsStorage, None, None]: except AzureError: pass else: - connection_str = f'DefaultEndpointsProtocol=https;AccountName={MOCK_AZURE_ACCOUNT_NAME};' \ - 'AccountKey=U29tZVJhbmRvbUNyYXBIZXJlCg==;EndpointSuffix=core.windows.net' - yield AzureBlobsStorage(connection_str, MOCK_AZURE_CONTAINER_NAME, path_prefix=prefix) + connection_str = ( + f"DefaultEndpointsProtocol=https;AccountName={MOCK_AZURE_ACCOUNT_NAME};" + "AccountKey=U29tZVJhbmRvbUNyYXBIZXJlCg==;EndpointSuffix=core.windows.net" + ) + yield AzureBlobsStorage( + connection_str, MOCK_AZURE_CONTAINER_NAME, path_prefix=prefix + ) -@pytest.fixture(scope='module') +@pytest.fixture(scope="module") def vcr_config(): - live_tests = bool(os.environ.get('AZURE_CONNECTION_STRING') and os.environ.get('AZURE_CONTAINER')) + live_tests = bool( + os.environ.get("AZURE_CONNECTION_STRING") and os.environ.get("AZURE_CONTAINER") + ) if live_tests: - mode = 'once' + mode = "once" else: - mode = 'none' + mode = "none" return { - "filter_headers": [ - ('authorization', 'fake-authz-header') - ], - "record_mode": mode + "filter_headers": [("authorization", "fake-authz-header")], + "record_mode": mode, } @pytest.mark.vcr() -class TestAzureBlobStorageBackend(StreamingStorageAbstractTests, ExternalStorageAbstractTests): +class TestAzureBlobStorageBackend( + StreamingStorageAbstractTests, ExternalStorageAbstractTests +): pass diff --git a/tests/storage/test_google_cloud.py b/tests/storage/test_google_cloud.py index aff4d63..cd78e20 100644 --- a/tests/storage/test_google_cloud.py +++ b/tests/storage/test_google_cloud.py @@ -8,51 +8,52 @@ from giftless.storage.google_cloud import GoogleCloudStorage -from . import ExternalStorageAbstractTests, StreamingStorageAbstractTests -MOCK_GCP_PROJECT_NAME = 'giftless-tests' -MOCK_GCP_BUCKET_NAME = 'giftless-tests-20200818' +MOCK_GCP_PROJECT_NAME = "giftless-tests" +MOCK_GCP_BUCKET_NAME = "giftless-tests-20200818" # This is a valid but revoked key that we use in testing -MOCK_GCP_KEY_B64 = ("ewogICJ0eXBlIjogInNlcnZpY2VfYWNjb3VudCIsCiAgInByb2plY3RfaWQiOiAiZ2lmdGxlc3MtdGVz" - "dHMiLAogICJwcml2YXRlX2tleV9pZCI6ICI4MWRhNDcxNzhiYzhmYjE1MDU1NTg3OWRjZTczZThmZDlm" - "OWI4NmJkIiwKICAicHJpdmF0ZV9rZXkiOiAiLS0tLS1CRUdJTiBQUklWQVRFIEtFWS0tLS0tXG5NSUlF" - "dkFJQkFEQU5CZ2txaGtpRzl3MEJBUUVGQUFTQ0JLWXdnZ1NpQWdFQUFvSUJBUUNsYXdDOUEvZHBnbVJW" - "XG5kYVg2UW5xY1N6YW5ueTdCVlgwVklwUHVjNzl2aFR2NWRwZXRaa29SQmV6Uzg2ZStHUHVyTmJIMU9r" - "WEZrL2tkXG5SNHFqMDV6SXlYeWxiQUVxSk1BV24zZFY0VUVRVFlmRitPY0ltZUxpcjR3cW9pTldDZDNJ" - "aHErNHVVeU1WRDMxXG5wc1FlcWVxcWV6bVoyNG1oTjBLK2NQczNuSXlIK0lzZXFsWjJob3U3bUU3U2Js" - "YXdjc04ramcyNmQ5YzFUZlpoXG42eFozVkpndGFtcUZvdlZmbEZwNFVvLy9tVGo0cXEwUWRUYk9SS1NE" - "eVkxTWhkQ24veSsyaForVm9IUitFM0Z4XG5XRmc2VGFwRGJhc29heEp5YjRoZEFFK0JhbW14bklTL09G" - "bElaMGVoL2tsRmlBTlJRMEpQb2dXRjFjVE9NcVFxXG4wMlVFV2V5ckFnTUJBQUVDZ2dFQUJNOE5odGVQ" - "NElhTEUxd2haclN0cEp5NWltMGgxenFXTVlCTU85WDR4KzJUXG5PZmRUYStLbWtpcUV1c1UyanNJdks1" - "VUJPakVBcncxVU1RYnBaaEtoTDhub2c3dGkyNjVoMG1Ba1pzWlZOWHU0XG5UKzQ4REZ4YzQ4THlzaktX" - "M1RCQVBSb2RRbkJLTVA3MnY4QThKMU5BYlMwZ3IvTW1TbEVidm1tT2FuTU9ONXAwXG43djlscm9GMzFO" - "akMzT05OY25pUlRYY01xT2tEbWt5LyszeVc2RldMMkJZV3RwcGN3L0s1TnYxdGNMTG5iajVhXG5Hc3dV" - "MENtQXgyTEVoWEo0bndJaWlFR3h6UGZYVXNLcVhLL2JEZENKbDUzMTgraU9aSHNrdXR1OFlqQVpsdktp" - "XG5yckNFUkFXZitLeTZ0WGhnKzJIRzJJbUc5cG8wRnUwTGlIU0ZVUURKY1FLQmdRRFQ5RDJEYm9SNWFG" - "WW0wQlVSXG5vNGd4OHZGc0NyTEx0a09EZGx3U2wrT20yblFvY0JXSTEyTmF5QXRlL2xhVFZNRlorVks1" - "bU9vYXl2WnljTU1YXG5SdXZJYmdCTFdHYkdwSXdXZnlDOGxRZEJYM09xZTZZSzZTMUU2VnNYUVN0aHQ0" - "YUx3ZGpGQ2J6VU1lc1ZzREV5XG5FYU85aXlTUVlFTmFTN2V3amFzNUFVU1F0d0tCZ1FESHl4WUp3bWxp" - "QzE4NEVyZ3lZSEFwYm9weXQzSVkzVGFKXG5yV2MrSGw5WDNzVEJzaFVQYy85SmhjanZKYWVzMlhrcEEw" - "YmY5cis1MEcxUndua3dMWHZsbDJSU0FBNE05TG4rXG45cVlsNEFXNU9QVTVJS0tKYVk1c0kzSHdXTXd6" - "elRya3FBV3hNallJME9OSnBaWUVnSTVKN09sek1jYnhLREZxXG51MmpjYkFubnJRS0JnRlUxaklGSkxm" - "TE5FazE2Tys0aWF6K0Jack5EdmN1TjA2aUhMYzYveDJLdDBpTHJwSXlsXG40cWg5WWF6bjNSQlA4NGRq" - "WjNGNzJ5bTRUTW1ITWJjcTZPRmo3N1JhcnI3UEtnNWxQMWp4Sk1DUVNpVFFudGttXG5FdS93VEpHVnZv" - "WURUUkRrZG13SVZTU05pTy9vTEc3dmpuOUY4QVltM1F6eEFjRDF3MDhnaGxzVEFvR0FidUthXG4vNTJq" - "eVdPUVhGbWZXMjVFc2VvRTh2ZzNYZTlnZG5jRUJ1anFkNlZPeEVYbkJHV1h1U0dFVEo0MGVtMVVubHVR" - "XG5PWHNFRzhlKzlKS2ZtZ3FVYWU5bElWR2dlclpVaUZveUNuRlVHK0d0MEIvNXRaUWRGSTF6ampacVZ4" - "Ry9idXFHXG5CanRjMi9XN1A4T2tDQ21sVHdncTVPRXFqZXVGeWJ2cnpmSTBhUjBDZ1lCdVlYWm5MMm1x" - "eVNma0FnaGswRVVmXG5XeElDb1FmRDdCQlJBV3lmL3VwRjQ2NlMvRmhONUVreG5vdkZ2RlZyQjU1SHVH" - "RTh2Qk4vTEZNVXlPU0xXQ0lIXG5RUG9ZcytNM0NLdGJWTXMxY1h2Tm5tZFRhMnRyYjQ0SlQ5ZlFLbkVw" - "a2VsbUdPdXJMNEVMdmFyUEFyR0x4VllTXG5jWFo1a1FBUy9GeGhFSDZSbnFSalFnPT1cbi0tLS0tRU5E" - "IFBSSVZBVEUgS0VZLS0tLS1cbiIsCiAgImNsaWVudF9lbWFpbCI6ICJzb21lLXNlcnZpY2UtYWNjb3Vu" - "dEBnaWZ0bGVzcy10ZXN0cy5pYW0uZ3NlcnZpY2VhY2NvdW50LmNvbSIsCiAgImNsaWVudF9pZCI6ICIx" - "MDk4NTYwMjgzNDI5MDI4ODI3MTUiLAogICJhdXRoX3VyaSI6ICJodHRwczovL2FjY291bnRzLmdvb2ds" - "ZS5jb20vby9vYXV0aDIvYXV0aCIsCiAgInRva2VuX3VyaSI6ICJodHRwczovL29hdXRoMi5nb29nbGVh" - "cGlzLmNvbS90b2tlbiIsCiAgImF1dGhfcHJvdmlkZXJfeDUwOV9jZXJ0X3VybCI6ICJodHRwczovL3d3" - "dy5nb29nbGVhcGlzLmNvbS9vYXV0aDIvdjEvY2VydHMiLAogICJjbGllbnRfeDUwOV9jZXJ0X3VybCI6" - "ICJodHRwczovL3d3dy5nb29nbGVhcGlzLmNvbS9yb2JvdC92MS9tZXRhZGF0YS94NTA5L3NvbWUtc2Vy" - "dmljZS1hY2NvdW50JTQwZ2lmdGxlc3MtdGVzdHMuaWFtLmdzZXJ2aWNlYWNjb3VudC5jb20iCn0K") +MOCK_GCP_KEY_B64 = ( + "ewogICJ0eXBlIjogInNlcnZpY2VfYWNjb3VudCIsCiAgInByb2plY3RfaWQiOiAiZ2lmdGxlc3MtdGVz" + "dHMiLAogICJwcml2YXRlX2tleV9pZCI6ICI4MWRhNDcxNzhiYzhmYjE1MDU1NTg3OWRjZTczZThmZDlm" + "OWI4NmJkIiwKICAicHJpdmF0ZV9rZXkiOiAiLS0tLS1CRUdJTiBQUklWQVRFIEtFWS0tLS0tXG5NSUlF" + "dkFJQkFEQU5CZ2txaGtpRzl3MEJBUUVGQUFTQ0JLWXdnZ1NpQWdFQUFvSUJBUUNsYXdDOUEvZHBnbVJW" + "XG5kYVg2UW5xY1N6YW5ueTdCVlgwVklwUHVjNzl2aFR2NWRwZXRaa29SQmV6Uzg2ZStHUHVyTmJIMU9r" + "WEZrL2tkXG5SNHFqMDV6SXlYeWxiQUVxSk1BV24zZFY0VUVRVFlmRitPY0ltZUxpcjR3cW9pTldDZDNJ" + "aHErNHVVeU1WRDMxXG5wc1FlcWVxcWV6bVoyNG1oTjBLK2NQczNuSXlIK0lzZXFsWjJob3U3bUU3U2Js" + "YXdjc04ramcyNmQ5YzFUZlpoXG42eFozVkpndGFtcUZvdlZmbEZwNFVvLy9tVGo0cXEwUWRUYk9SS1NE" + "eVkxTWhkQ24veSsyaForVm9IUitFM0Z4XG5XRmc2VGFwRGJhc29heEp5YjRoZEFFK0JhbW14bklTL09G" + "bElaMGVoL2tsRmlBTlJRMEpQb2dXRjFjVE9NcVFxXG4wMlVFV2V5ckFnTUJBQUVDZ2dFQUJNOE5odGVQ" + "NElhTEUxd2haclN0cEp5NWltMGgxenFXTVlCTU85WDR4KzJUXG5PZmRUYStLbWtpcUV1c1UyanNJdks1" + "VUJPakVBcncxVU1RYnBaaEtoTDhub2c3dGkyNjVoMG1Ba1pzWlZOWHU0XG5UKzQ4REZ4YzQ4THlzaktX" + "M1RCQVBSb2RRbkJLTVA3MnY4QThKMU5BYlMwZ3IvTW1TbEVidm1tT2FuTU9ONXAwXG43djlscm9GMzFO" + "akMzT05OY25pUlRYY01xT2tEbWt5LyszeVc2RldMMkJZV3RwcGN3L0s1TnYxdGNMTG5iajVhXG5Hc3dV" + "MENtQXgyTEVoWEo0bndJaWlFR3h6UGZYVXNLcVhLL2JEZENKbDUzMTgraU9aSHNrdXR1OFlqQVpsdktp" + "XG5yckNFUkFXZitLeTZ0WGhnKzJIRzJJbUc5cG8wRnUwTGlIU0ZVUURKY1FLQmdRRFQ5RDJEYm9SNWFG" + "WW0wQlVSXG5vNGd4OHZGc0NyTEx0a09EZGx3U2wrT20yblFvY0JXSTEyTmF5QXRlL2xhVFZNRlorVks1" + "bU9vYXl2WnljTU1YXG5SdXZJYmdCTFdHYkdwSXdXZnlDOGxRZEJYM09xZTZZSzZTMUU2VnNYUVN0aHQ0" + "YUx3ZGpGQ2J6VU1lc1ZzREV5XG5FYU85aXlTUVlFTmFTN2V3amFzNUFVU1F0d0tCZ1FESHl4WUp3bWxp" + "QzE4NEVyZ3lZSEFwYm9weXQzSVkzVGFKXG5yV2MrSGw5WDNzVEJzaFVQYy85SmhjanZKYWVzMlhrcEEw" + "YmY5cis1MEcxUndua3dMWHZsbDJSU0FBNE05TG4rXG45cVlsNEFXNU9QVTVJS0tKYVk1c0kzSHdXTXd6" + "elRya3FBV3hNallJME9OSnBaWUVnSTVKN09sek1jYnhLREZxXG51MmpjYkFubnJRS0JnRlUxaklGSkxm" + "TE5FazE2Tys0aWF6K0Jack5EdmN1TjA2aUhMYzYveDJLdDBpTHJwSXlsXG40cWg5WWF6bjNSQlA4NGRq" + "WjNGNzJ5bTRUTW1ITWJjcTZPRmo3N1JhcnI3UEtnNWxQMWp4Sk1DUVNpVFFudGttXG5FdS93VEpHVnZv" + "WURUUkRrZG13SVZTU05pTy9vTEc3dmpuOUY4QVltM1F6eEFjRDF3MDhnaGxzVEFvR0FidUthXG4vNTJq" + "eVdPUVhGbWZXMjVFc2VvRTh2ZzNYZTlnZG5jRUJ1anFkNlZPeEVYbkJHV1h1U0dFVEo0MGVtMVVubHVR" + "XG5PWHNFRzhlKzlKS2ZtZ3FVYWU5bElWR2dlclpVaUZveUNuRlVHK0d0MEIvNXRaUWRGSTF6ampacVZ4" + "Ry9idXFHXG5CanRjMi9XN1A4T2tDQ21sVHdncTVPRXFqZXVGeWJ2cnpmSTBhUjBDZ1lCdVlYWm5MMm1x" + "eVNma0FnaGswRVVmXG5XeElDb1FmRDdCQlJBV3lmL3VwRjQ2NlMvRmhONUVreG5vdkZ2RlZyQjU1SHVH" + "RTh2Qk4vTEZNVXlPU0xXQ0lIXG5RUG9ZcytNM0NLdGJWTXMxY1h2Tm5tZFRhMnRyYjQ0SlQ5ZlFLbkVw" + "a2VsbUdPdXJMNEVMdmFyUEFyR0x4VllTXG5jWFo1a1FBUy9GeGhFSDZSbnFSalFnPT1cbi0tLS0tRU5E" + "IFBSSVZBVEUgS0VZLS0tLS1cbiIsCiAgImNsaWVudF9lbWFpbCI6ICJzb21lLXNlcnZpY2UtYWNjb3Vu" + "dEBnaWZ0bGVzcy10ZXN0cy5pYW0uZ3NlcnZpY2VhY2NvdW50LmNvbSIsCiAgImNsaWVudF9pZCI6ICIx" + "MDk4NTYwMjgzNDI5MDI4ODI3MTUiLAogICJhdXRoX3VyaSI6ICJodHRwczovL2FjY291bnRzLmdvb2ds" + "ZS5jb20vby9vYXV0aDIvYXV0aCIsCiAgInRva2VuX3VyaSI6ICJodHRwczovL29hdXRoMi5nb29nbGVh" + "cGlzLmNvbS90b2tlbiIsCiAgImF1dGhfcHJvdmlkZXJfeDUwOV9jZXJ0X3VybCI6ICJodHRwczovL3d3" + "dy5nb29nbGVhcGlzLmNvbS9vYXV0aDIvdjEvY2VydHMiLAogICJjbGllbnRfeDUwOV9jZXJ0X3VybCI6" + "ICJodHRwczovL3d3dy5nb29nbGVhcGlzLmNvbS9yb2JvdC92MS9tZXRhZGF0YS94NTA5L3NvbWUtc2Vy" + "dmljZS1hY2NvdW50JTQwZ2lmdGxlc3MtdGVzdHMuaWFtLmdzZXJ2aWNlYWNjb3VudC5jb20iCn0K" +) @pytest.fixture() @@ -66,43 +67,53 @@ def storage_backend() -> Generator[GoogleCloudStorage, None, None]: If these variables are not set, and pytest-vcr is not in use, the tests *will* fail. """ - account_key_file = os.environ.get('GCP_ACCOUNT_KEY_FILE') - project_name = os.environ.get('GCP_PROJECT_NAME') - bucket_name = os.environ.get('GCP_BUCKET_NAME') - prefix = 'giftless-tests' + account_key_file = os.environ.get("GCP_ACCOUNT_KEY_FILE") + project_name = os.environ.get("GCP_PROJECT_NAME") + bucket_name = os.environ.get("GCP_BUCKET_NAME") + prefix = "giftless-tests" if account_key_file and project_name and bucket_name: # We use a live GCS bucket to test - storage = GoogleCloudStorage(project_name=project_name, bucket_name=bucket_name, - account_key_file=account_key_file, path_prefix=prefix) + storage = GoogleCloudStorage( + project_name=project_name, + bucket_name=bucket_name, + account_key_file=account_key_file, + path_prefix=prefix, + ) try: yield storage finally: bucket = storage.storage_client.bucket(bucket_name) try: - blobs = bucket.list_blobs(prefix=prefix + '/') + blobs = bucket.list_blobs(prefix=prefix + "/") bucket.delete_blobs(blobs) except GoogleAPIError as e: - raise pytest.PytestWarning("Could not clean up after test: {}".format(e)) + raise pytest.PytestWarning( + "Could not clean up after test: {}".format(e) + ) else: - yield GoogleCloudStorage(project_name=MOCK_GCP_PROJECT_NAME, bucket_name=MOCK_GCP_BUCKET_NAME, - account_key_base64=MOCK_GCP_KEY_B64, path_prefix=prefix) + yield GoogleCloudStorage( + project_name=MOCK_GCP_PROJECT_NAME, + bucket_name=MOCK_GCP_BUCKET_NAME, + account_key_base64=MOCK_GCP_KEY_B64, + path_prefix=prefix, + ) -@pytest.fixture(scope='module') +@pytest.fixture(scope="module") def vcr_config(): - live_tests = bool(os.environ.get('GCP_ACCOUNT_KEY_FILE') and - os.environ.get('GCP_PROJECT_NAME') and - os.environ.get('GCP_BUCKET_NAME')) + live_tests = bool( + os.environ.get("GCP_ACCOUNT_KEY_FILE") + and os.environ.get("GCP_PROJECT_NAME") + and os.environ.get("GCP_BUCKET_NAME") + ) if live_tests: - mode = 'once' + mode = "once" else: - mode = 'none' + mode = "none" return { - "filter_headers": [ - ('authorization', 'fake-authz-header') - ], - "record_mode": mode + "filter_headers": [("authorization", "fake-authz-header")], + "record_mode": mode, } diff --git a/tests/storage/test_local.py b/tests/storage/test_local.py index 9311b10..a73e75a 100644 --- a/tests/storage/test_local.py +++ b/tests/storage/test_local.py @@ -14,11 +14,10 @@ @pytest.fixture() def storage_dir(tmp_path) -> Generator[pathlib.Path, None, None]: - """Create a unique temp dir for testing storage - """ + """Create a unique temp dir for testing storage""" dir = None try: - dir = tmp_path / 'giftless_tests' + dir = tmp_path / "giftless_tests" dir.mkdir(parents=True) yield dir finally: @@ -33,11 +32,9 @@ def storage_backend(storage_dir) -> LocalStorage: class TestLocalStorageBackend(StreamingStorageAbstractTests): - def test_local_path_created_on_init(self, storage_dir: pathlib.Path): - """Test that the local storage path is created on module init - """ - storage_path = str(storage_dir / 'here') + """Test that the local storage path is created on module init""" + storage_path = str(storage_dir / "here") assert not os.path.exists(storage_path) LocalStorage(path=storage_path) assert os.path.exists(storage_path) diff --git a/tests/test_batch_api.py b/tests/test_batch_api.py index 4cbc275..9b37fe3 100644 --- a/tests/test_batch_api.py +++ b/tests/test_batch_api.py @@ -5,184 +5,166 @@ from .helpers import batch_request_payload, create_file_in_storage -@pytest.mark.usefixtures('authz_full_access') +@pytest.mark.usefixtures("authz_full_access") def test_upload_batch_request(test_client): - """Test basic batch API with a basic successful upload request - """ - request_payload = batch_request_payload(operation='upload') - response = test_client.post('/myorg/myrepo/objects/batch', - json=request_payload) + """Test basic batch API with a basic successful upload request""" + request_payload = batch_request_payload(operation="upload") + response = test_client.post("/myorg/myrepo/objects/batch", json=request_payload) assert 200 == response.status_code - assert 'application/vnd.git-lfs+json' == response.content_type + assert "application/vnd.git-lfs+json" == response.content_type payload = response.json - assert 'message' not in payload - assert payload['transfer'] == 'basic' - assert len(payload['objects']) == 1 + assert "message" not in payload + assert payload["transfer"] == "basic" + assert len(payload["objects"]) == 1 - object = payload['objects'][0] - assert object['oid'] == request_payload['objects'][0]['oid'] - assert object['size'] == request_payload['objects'][0]['size'] - assert len(object['actions']) == 2 - assert 'upload' in object['actions'] - assert 'verify' in object['actions'] + object = payload["objects"][0] + assert object["oid"] == request_payload["objects"][0]["oid"] + assert object["size"] == request_payload["objects"][0]["size"] + assert len(object["actions"]) == 2 + assert "upload" in object["actions"] + assert "verify" in object["actions"] def test_download_batch_request(test_client, storage_path): - """Test basic batch API with a basic successful upload request - """ - request_payload = batch_request_payload(operation='download') - oid = request_payload['objects'][0]['oid'] - create_file_in_storage(storage_path, 'myorg', 'myrepo', oid, size=8) + """Test basic batch API with a basic successful upload request""" + request_payload = batch_request_payload(operation="download") + oid = request_payload["objects"][0]["oid"] + create_file_in_storage(storage_path, "myorg", "myrepo", oid, size=8) - response = test_client.post('/myorg/myrepo/objects/batch', - json=request_payload) + response = test_client.post("/myorg/myrepo/objects/batch", json=request_payload) assert 200 == response.status_code - assert 'application/vnd.git-lfs+json' == response.content_type + assert "application/vnd.git-lfs+json" == response.content_type payload = response.json - assert 'message' not in payload - assert payload['transfer'] == 'basic' - assert len(payload['objects']) == 1 + assert "message" not in payload + assert payload["transfer"] == "basic" + assert len(payload["objects"]) == 1 - object = payload['objects'][0] - assert object['oid'] == request_payload['objects'][0]['oid'] - assert object['size'] == request_payload['objects'][0]['size'] - assert len(object['actions']) == 1 - assert 'download' in object['actions'] + object = payload["objects"][0] + assert object["oid"] == request_payload["objects"][0]["oid"] + assert object["size"] == request_payload["objects"][0]["size"] + assert len(object["actions"]) == 1 + assert "download" in object["actions"] def test_download_batch_request_two_files_one_missing(test_client, storage_path): - """Test batch API with a two object download request where one file 404 - """ - request_payload = batch_request_payload(operation='download') - oid = request_payload['objects'][0]['oid'] - create_file_in_storage(storage_path, 'myorg', 'myrepo', oid, size=8) + """Test batch API with a two object download request where one file 404""" + request_payload = batch_request_payload(operation="download") + oid = request_payload["objects"][0]["oid"] + create_file_in_storage(storage_path, "myorg", "myrepo", oid, size=8) # Add a 2nd, non existing object - request_payload['objects'].append({ - "oid": "12345679", - "size": 5555 - }) + request_payload["objects"].append({"oid": "12345679", "size": 5555}) - response = test_client.post('/myorg/myrepo/objects/batch', - json=request_payload) + response = test_client.post("/myorg/myrepo/objects/batch", json=request_payload) assert 200 == response.status_code - assert 'application/vnd.git-lfs+json' == response.content_type + assert "application/vnd.git-lfs+json" == response.content_type payload = response.json - assert 'message' not in payload - assert payload['transfer'] == 'basic' - assert len(payload['objects']) == 2 + assert "message" not in payload + assert payload["transfer"] == "basic" + assert len(payload["objects"]) == 2 - object = payload['objects'][0] - assert object['oid'] == request_payload['objects'][0]['oid'] - assert object['size'] == request_payload['objects'][0]['size'] - assert len(object['actions']) == 1 - assert 'download' in object['actions'] + object = payload["objects"][0] + assert object["oid"] == request_payload["objects"][0]["oid"] + assert object["size"] == request_payload["objects"][0]["size"] + assert len(object["actions"]) == 1 + assert "download" in object["actions"] - object = payload['objects'][1] - assert object['oid'] == request_payload['objects'][1]['oid'] - assert object['size'] == request_payload['objects'][1]['size'] - assert 'actions' not in object - assert object['error']['code'] == 404 + object = payload["objects"][1] + assert object["oid"] == request_payload["objects"][1]["oid"] + assert object["size"] == request_payload["objects"][1]["size"] + assert "actions" not in object + assert object["error"]["code"] == 404 def test_download_batch_request_two_files_missing(test_client): - """Test batch API with a two object download request where one file 404 - """ - request_payload = batch_request_payload(operation='download') - request_payload['objects'].append({ - "oid": "12345679", - "size": 5555 - }) + """Test batch API with a two object download request where one file 404""" + request_payload = batch_request_payload(operation="download") + request_payload["objects"].append({"oid": "12345679", "size": 5555}) - response = test_client.post('/myorg/myrepo/objects/batch', - json=request_payload) + response = test_client.post("/myorg/myrepo/objects/batch", json=request_payload) assert 404 == response.status_code - assert 'application/vnd.git-lfs+json' == response.content_type + assert "application/vnd.git-lfs+json" == response.content_type payload = response.json - assert 'message' in payload - assert 'objects' not in payload - assert 'transfer' not in payload + assert "message" in payload + assert "objects" not in payload + assert "transfer" not in payload def test_download_batch_request_two_files_one_mismatch(test_client, storage_path): - """Test batch API with a two object download request where one file 422 - """ - request_payload = batch_request_payload(operation='download') - request_payload['objects'].append({ - "oid": "12345679", - "size": 8 - }) + """Test batch API with a two object download request where one file 422""" + request_payload = batch_request_payload(operation="download") + request_payload["objects"].append({"oid": "12345679", "size": 8}) - create_file_in_storage(storage_path, 'myorg', 'myrepo', request_payload['objects'][0]['oid'], size=8) - create_file_in_storage(storage_path, 'myorg', 'myrepo', request_payload['objects'][1]['oid'], size=9) + create_file_in_storage( + storage_path, "myorg", "myrepo", request_payload["objects"][0]["oid"], size=8 + ) + create_file_in_storage( + storage_path, "myorg", "myrepo", request_payload["objects"][1]["oid"], size=9 + ) - response = test_client.post('/myorg/myrepo/objects/batch', - json=request_payload) + response = test_client.post("/myorg/myrepo/objects/batch", json=request_payload) assert 200 == response.status_code - assert 'application/vnd.git-lfs+json' == response.content_type + assert "application/vnd.git-lfs+json" == response.content_type payload = response.json - assert 'message' not in payload - assert payload['transfer'] == 'basic' - assert len(payload['objects']) == 2 + assert "message" not in payload + assert payload["transfer"] == "basic" + assert len(payload["objects"]) == 2 - object = payload['objects'][0] - assert object['oid'] == request_payload['objects'][0]['oid'] - assert object['size'] == request_payload['objects'][0]['size'] - assert len(object['actions']) == 1 - assert 'download' in object['actions'] + object = payload["objects"][0] + assert object["oid"] == request_payload["objects"][0]["oid"] + assert object["size"] == request_payload["objects"][0]["size"] + assert len(object["actions"]) == 1 + assert "download" in object["actions"] - object = payload['objects'][1] - assert object['oid'] == request_payload['objects'][1]['oid'] - assert object['size'] == request_payload['objects'][1]['size'] - assert 'actions' not in object - assert object['error']['code'] == 422 + object = payload["objects"][1] + assert object["oid"] == request_payload["objects"][1]["oid"] + assert object["size"] == request_payload["objects"][1]["size"] + assert "actions" not in object + assert object["error"]["code"] == 422 def test_download_batch_request_one_file_mismatch(test_client, storage_path): - """Test batch API with a two object download request where one file 422 - """ - request_payload = batch_request_payload(operation='download') - create_file_in_storage(storage_path, 'myorg', 'myrepo', request_payload['objects'][0]['oid'], size=9) + """Test batch API with a two object download request where one file 422""" + request_payload = batch_request_payload(operation="download") + create_file_in_storage( + storage_path, "myorg", "myrepo", request_payload["objects"][0]["oid"], size=9 + ) - response = test_client.post('/myorg/myrepo/objects/batch', - json=request_payload) + response = test_client.post("/myorg/myrepo/objects/batch", json=request_payload) assert 422 == response.status_code - assert 'application/vnd.git-lfs+json' == response.content_type + assert "application/vnd.git-lfs+json" == response.content_type payload = response.json - assert 'message' in payload - assert 'objects' not in payload - assert 'transfer' not in payload + assert "message" in payload + assert "objects" not in payload + assert "transfer" not in payload def test_download_batch_request_two_files_different_errors(test_client, storage_path): - """Test batch API with a two object download request where one file is missing and one is mismatch - """ - request_payload = batch_request_payload(operation='download') - request_payload['objects'].append({ - "oid": "12345679", - "size": 8 - }) - create_file_in_storage(storage_path, 'myorg', 'myrepo', request_payload['objects'][0]['oid'], size=9) - - response = test_client.post('/myorg/myrepo/objects/batch', - json=request_payload) + """Test batch API with a two object download request where one file is missing and one is mismatch""" + request_payload = batch_request_payload(operation="download") + request_payload["objects"].append({"oid": "12345679", "size": 8}) + create_file_in_storage( + storage_path, "myorg", "myrepo", request_payload["objects"][0]["oid"], size=9 + ) + + response = test_client.post("/myorg/myrepo/objects/batch", json=request_payload) assert 422 == response.status_code - assert 'application/vnd.git-lfs+json' == response.content_type + assert "application/vnd.git-lfs+json" == response.content_type payload = response.json - assert 'message' in payload - assert 'objects' not in payload - assert 'transfer' not in payload + assert "message" in payload + assert "objects" not in payload + assert "transfer" not in payload diff --git a/tests/test_error_responses.py b/tests/test_error_responses.py index bba0f0a..8947550 100644 --- a/tests/test_error_responses.py +++ b/tests/test_error_responses.py @@ -4,32 +4,32 @@ def test_error_response_422(test_client): - """Test an invalid payload error - """ - response = test_client.post('/myorg/myrepo/objects/batch', - json=batch_request_payload(delete_keys=['operation'])) + """Test an invalid payload error""" + response = test_client.post( + "/myorg/myrepo/objects/batch", + json=batch_request_payload(delete_keys=["operation"]), + ) assert 422 == response.status_code - assert 'application/vnd.git-lfs+json' == response.content_type - assert 'message' in response.json + assert "application/vnd.git-lfs+json" == response.content_type + assert "message" in response.json def test_error_response_404(test_client): - """Test a bad route error - """ - response = test_client.get('/now/for/something/completely/different') + """Test a bad route error""" + response = test_client.get("/now/for/something/completely/different") assert 404 == response.status_code - assert 'application/vnd.git-lfs+json' == response.content_type - assert 'message' in response.json + assert "application/vnd.git-lfs+json" == response.content_type + assert "message" in response.json def test_error_response_403(test_client): - """Test that we get Forbidden when trying to upload with the default read-only setup - """ - response = test_client.post('/myorg/myrepo/objects/batch', - json=batch_request_payload(operation='upload')) + """Test that we get Forbidden when trying to upload with the default read-only setup""" + response = test_client.post( + "/myorg/myrepo/objects/batch", json=batch_request_payload(operation="upload") + ) assert 403 == response.status_code - assert 'application/vnd.git-lfs+json' == response.content_type - assert 'message' in response.json + assert "application/vnd.git-lfs+json" == response.content_type + assert "message" in response.json diff --git a/tests/test_middleware.py b/tests/test_middleware.py index a1c1596..b607e56 100644 --- a/tests/test_middleware.py +++ b/tests/test_middleware.py @@ -9,54 +9,51 @@ @pytest.fixture() def app(storage_path): - """Session fixture to configure the Flask app - """ - app = init_app(additional_config={ - "TESTING": True, - "TRANSFER_ADAPTERS": { - "basic": { - "options": { - "storage_options": { - "path": storage_path - } + """Session fixture to configure the Flask app""" + app = init_app( + additional_config={ + "TESTING": True, + "TRANSFER_ADAPTERS": { + "basic": {"options": {"storage_options": {"path": storage_path}}} + }, + "MIDDLEWARE": [ + { + "class": "werkzeug.middleware.proxy_fix:ProxyFix", + "kwargs": { + "x_host": 1, + "x_port": 1, + "x_prefix": 1, + }, } - } - }, - "MIDDLEWARE": [ - { - "class": "werkzeug.middleware.proxy_fix:ProxyFix", - "kwargs": { - "x_host": 1, - "x_port": 1, - "x_prefix": 1, - } - } - ] - }) + ], + } + ) return app -@pytest.mark.usefixtures('authz_full_access') +@pytest.mark.usefixtures("authz_full_access") def test_upload_request_with_x_forwarded_middleware(test_client): - """Test the ProxyFix middleware generates correct URLs if X-Forwarded headers are set - """ - request_payload = batch_request_payload(operation='upload') - response = test_client.post('/myorg/myrepo/objects/batch', - json=request_payload) + """Test the ProxyFix middleware generates correct URLs if X-Forwarded headers are set""" + request_payload = batch_request_payload(operation="upload") + response = test_client.post("/myorg/myrepo/objects/batch", json=request_payload) assert 200 == response.status_code - href = response.json['objects'][0]['actions']['upload']['href'] - assert 'http://localhost/myorg/myrepo/objects/storage/12345678' == href - - response = test_client.post('/myorg/myrepo/objects/batch', - json=request_payload, - headers={ - 'X-Forwarded-Host': 'mycompany.xyz', - 'X-Forwarded-Port': '1234', - 'X-Forwarded-Prefix': '/lfs', - 'X-Forwarded-Proto': 'https' - }) + href = response.json["objects"][0]["actions"]["upload"]["href"] + assert "http://localhost/myorg/myrepo/objects/storage/12345678" == href + + response = test_client.post( + "/myorg/myrepo/objects/batch", + json=request_payload, + headers={ + "X-Forwarded-Host": "mycompany.xyz", + "X-Forwarded-Port": "1234", + "X-Forwarded-Prefix": "/lfs", + "X-Forwarded-Proto": "https", + }, + ) assert 200 == response.status_code - href = response.json['objects'][0]['actions']['upload']['href'] - assert 'https://mycompany.xyz:1234/lfs/myorg/myrepo/objects/storage/12345678' == href + href = response.json["objects"][0]["actions"]["upload"]["href"] + assert ( + "https://mycompany.xyz:1234/lfs/myorg/myrepo/objects/storage/12345678" == href + ) diff --git a/tests/test_schema.py b/tests/test_schema.py index 93891a9..ea07249 100644 --- a/tests/test_schema.py +++ b/tests/test_schema.py @@ -9,47 +9,61 @@ from .helpers import batch_request_payload -@pytest.mark.parametrize('input', [ - (batch_request_payload()), - (batch_request_payload(operation='upload')), - (batch_request_payload(delete_keys=['ref', 'transfers'])), -]) +@pytest.mark.parametrize( + "input", + [ + (batch_request_payload()), + (batch_request_payload(operation="upload")), + (batch_request_payload(delete_keys=["ref", "transfers"])), + ], +) def test_batch_request_schema_valid(input): parsed = schema.BatchRequest().load(input) assert parsed -@pytest.mark.parametrize('input', [ - ({}), - (batch_request_payload(operation='sneeze')), - (batch_request_payload(objects=[])), - (batch_request_payload(objects=[{"oid": 123456, "size": "large of course"}])), - (batch_request_payload(objects=[{"oid": "123abc", "size": -12}])), -]) +@pytest.mark.parametrize( + "input", + [ + ({}), + (batch_request_payload(operation="sneeze")), + (batch_request_payload(objects=[])), + (batch_request_payload(objects=[{"oid": 123456, "size": "large of course"}])), + (batch_request_payload(objects=[{"oid": "123abc", "size": -12}])), + ], +) def test_batch_request_schema_invalid(input): with pytest.raises(ValidationError): schema.BatchRequest().load(input) def test_batch_request_default_transfer(): - input = batch_request_payload(delete_keys=['transfers']) + input = batch_request_payload(delete_keys=["transfers"]) parsed = schema.BatchRequest().load(input) - assert ['basic'] == parsed['transfers'] + assert ["basic"] == parsed["transfers"] def test_object_schema_accepts_x_fields(): payload = { - "oid": "123abc", "size": 1212, "x-filename": "foobarbaz", - "x-mtime": 123123123123, "x-disposition": "inline" + "oid": "123abc", + "size": 1212, + "x-filename": "foobarbaz", + "x-mtime": 123123123123, + "x-disposition": "inline", } parsed = schema.ObjectSchema().load(payload) - assert "foobarbaz" == parsed['extra']['filename'] - assert 123123123123 == parsed['extra']['mtime'] - assert "123abc" == parsed['oid'] - assert "inline" == parsed['extra']['disposition'] + assert "foobarbaz" == parsed["extra"]["filename"] + assert 123123123123 == parsed["extra"]["mtime"] + assert "123abc" == parsed["oid"] + assert "inline" == parsed["extra"]["disposition"] def test_object_schema_rejects_unknown_fields(): - payload = {"oid": "123abc", "size": 1212, "x-filename": "foobarbaz", "more": "stuff"} + payload = { + "oid": "123abc", + "size": 1212, + "x-filename": "foobarbaz", + "more": "stuff", + } with pytest.raises(ValidationError): schema.ObjectSchema().load(payload) diff --git a/tests/transfer/conftest.py b/tests/transfer/conftest.py index d5a409b..463c77b 100644 --- a/tests/transfer/conftest.py +++ b/tests/transfer/conftest.py @@ -7,8 +7,7 @@ @pytest.fixture() def reset_registered_transfers(): - """Reset global registered transfer adapters for each module - """ + """Reset global registered transfer adapters for each module""" adapters = dict(transfer._registered_adapters) # noqa try: yield diff --git a/tests/transfer/test_basic_external_adapter.py b/tests/transfer/test_basic_external_adapter.py index 0a11413..9b8fe9e 100644 --- a/tests/transfer/test_basic_external_adapter.py +++ b/tests/transfer/test_basic_external_adapter.py @@ -8,164 +8,173 @@ def test_factory_returns_object(): - """Test that the basic_external factory returns the right object(s) - """ + """Test that the basic_external factory returns the right object(s)""" base_url = "https://s4.example.com/" lifetime = 300 - adapter = basic_external.factory('{}:MockExternalStorageBackend'.format(__name__, ), - {"base_url": base_url}, - lifetime) + adapter = basic_external.factory( + "{}:MockExternalStorageBackend".format( + __name__, + ), + {"base_url": base_url}, + lifetime, + ) assert isinstance(adapter, basic_external.BasicExternalBackendTransferAdapter) - assert getattr(adapter.storage, 'base_url', None) == base_url + assert getattr(adapter.storage, "base_url", None) == base_url assert adapter.action_lifetime == lifetime -@pytest.mark.usefixtures('app_context') +@pytest.mark.usefixtures("app_context") def test_upload_action_new_file(): - adapter = basic_external.factory('{}:MockExternalStorageBackend'.format(__name__, ), {}, 900) - response = adapter.upload('myorg', 'myrepo', 'abcdef123456', 1234) + adapter = basic_external.factory( + "{}:MockExternalStorageBackend".format( + __name__, + ), + {}, + 900, + ) + response = adapter.upload("myorg", "myrepo", "abcdef123456", 1234) assert response == { - "oid": 'abcdef123456', + "oid": "abcdef123456", "size": 1234, "authenticated": True, "actions": { "upload": { - "href": 'https://cloudstorage.example.com/myorg/myrepo/abcdef123456?expires_in=900', + "href": "https://cloudstorage.example.com/myorg/myrepo/abcdef123456?expires_in=900", "header": {"x-foo-bar": "bazbaz"}, - "expires_in": 900 + "expires_in": 900, }, "verify": { - "href": 'http://giftless.local/myorg/myrepo/objects/storage/verify', + "href": "http://giftless.local/myorg/myrepo/objects/storage/verify", "header": {}, - "expires_in": 43200 - } - } + "expires_in": 43200, + }, + }, } -@pytest.mark.usefixtures('app_context') +@pytest.mark.usefixtures("app_context") def test_upload_action_extras_are_passed(): - adapter = basic_external.factory('{}:MockExternalStorageBackend'.format(__name__), {}, 900) - response = adapter.upload('myorg', 'myrepo', 'abcdef123456', 1234, {"filename": "foo.csv"}) + adapter = basic_external.factory( + "{}:MockExternalStorageBackend".format(__name__), {}, 900 + ) + response = adapter.upload( + "myorg", "myrepo", "abcdef123456", 1234, {"filename": "foo.csv"} + ) assert response == { - "oid": 'abcdef123456', + "oid": "abcdef123456", "size": 1234, "authenticated": True, "actions": { "upload": { - "href": 'https://cloudstorage.example.com/myorg/myrepo/abcdef123456?expires_in=900&filename=foo.csv', + "href": "https://cloudstorage.example.com/myorg/myrepo/abcdef123456?expires_in=900&filename=foo.csv", "header": {"x-foo-bar": "bazbaz"}, - "expires_in": 900 + "expires_in": 900, }, "verify": { - "href": 'http://giftless.local/myorg/myrepo/objects/storage/verify', + "href": "http://giftless.local/myorg/myrepo/objects/storage/verify", "header": {}, - "expires_in": 43200 - } - } + "expires_in": 43200, + }, + }, } -@pytest.mark.usefixtures('app_context') +@pytest.mark.usefixtures("app_context") def test_upload_action_existing_file(): storage = MockExternalStorageBackend() adapter = basic_external.BasicExternalBackendTransferAdapter(storage, 900) # Add an "existing object" - storage.existing_objects[('myorg/myrepo', 'abcdef123456')] = 1234 + storage.existing_objects[("myorg/myrepo", "abcdef123456")] = 1234 - response = adapter.upload('myorg', 'myrepo', 'abcdef123456', 1234) + response = adapter.upload("myorg", "myrepo", "abcdef123456", 1234) # We expect a response with no actions assert response == { - "oid": 'abcdef123456', + "oid": "abcdef123456", "size": 1234, } -@pytest.mark.usefixtures('app_context') +@pytest.mark.usefixtures("app_context") def test_download_action_existing_file(): storage = MockExternalStorageBackend() adapter = basic_external.BasicExternalBackendTransferAdapter(storage, 900) # Add an "existing object" - storage.existing_objects[('myorg/myrepo', 'abcdef123456')] = 1234 - response = adapter.download('myorg', 'myrepo', 'abcdef123456', 1234) + storage.existing_objects[("myorg/myrepo", "abcdef123456")] = 1234 + response = adapter.download("myorg", "myrepo", "abcdef123456", 1234) assert response == { - "oid": 'abcdef123456', + "oid": "abcdef123456", "size": 1234, "authenticated": True, "actions": { "download": { - "href": 'https://cloudstorage.example.com/myorg/myrepo/abcdef123456?expires_in=900', + "href": "https://cloudstorage.example.com/myorg/myrepo/abcdef123456?expires_in=900", "header": {}, - "expires_in": 900 + "expires_in": 900, } - } + }, } -@pytest.mark.usefixtures('app_context') +@pytest.mark.usefixtures("app_context") def test_download_action_non_existing_file(): storage = MockExternalStorageBackend() adapter = basic_external.BasicExternalBackendTransferAdapter(storage, 900) # Add an "existing object" - storage.existing_objects[('myorg/myrepo', '123456abcdef')] = 1234 - response = adapter.download('myorg', 'myrepo', 'abcdef123456', 1234) + storage.existing_objects[("myorg/myrepo", "123456abcdef")] = 1234 + response = adapter.download("myorg", "myrepo", "abcdef123456", 1234) assert response == { - "oid": 'abcdef123456', + "oid": "abcdef123456", "size": 1234, - "error": { - "code": 404, - "message": "Object does not exist" - } + "error": {"code": 404, "message": "Object does not exist"}, } -@pytest.mark.usefixtures('app_context') +@pytest.mark.usefixtures("app_context") def test_download_action_size_mismatch(): storage = MockExternalStorageBackend() adapter = basic_external.BasicExternalBackendTransferAdapter(storage, 900) # Add an "existing object" - storage.existing_objects[('myorg/myrepo', 'abcdef123456')] = 1234 - response = adapter.download('myorg', 'myrepo', 'abcdef123456', 12345) + storage.existing_objects[("myorg/myrepo", "abcdef123456")] = 1234 + response = adapter.download("myorg", "myrepo", "abcdef123456", 12345) assert response == { - "oid": 'abcdef123456', + "oid": "abcdef123456", "size": 12345, - "error": { - "code": 422, - "message": "Object size does not match" - } + "error": {"code": 422, "message": "Object size does not match"}, } -@pytest.mark.usefixtures('app_context') +@pytest.mark.usefixtures("app_context") def test_download_action_extras_are_passed(): storage = MockExternalStorageBackend() adapter = basic_external.BasicExternalBackendTransferAdapter(storage, 900) # Add an "existing object" - storage.existing_objects[('myorg/myrepo', 'abcdef123456')] = 1234 - response = adapter.download('myorg', 'myrepo', 'abcdef123456', 1234, {"filename": "foo.csv"}) + storage.existing_objects[("myorg/myrepo", "abcdef123456")] = 1234 + response = adapter.download( + "myorg", "myrepo", "abcdef123456", 1234, {"filename": "foo.csv"} + ) assert response == { - "oid": 'abcdef123456', + "oid": "abcdef123456", "size": 1234, "authenticated": True, "actions": { "download": { - "href": 'https://cloudstorage.example.com/myorg/myrepo/abcdef123456?expires_in=900&filename=foo.csv', + "href": "https://cloudstorage.example.com/myorg/myrepo/abcdef123456?expires_in=900&filename=foo.csv", "header": {}, - "expires_in": 900 + "expires_in": 900, } - } + }, } @@ -176,7 +185,8 @@ class MockExternalStorageBackend(basic_external.ExternalStorage): be used in testing to test the transfer adapter's behavior without accessing an actual cloud provider. """ - def __init__(self, base_url: str = 'https://cloudstorage.example.com/'): + + def __init__(self, base_url: str = "https://cloudstorage.example.com/"): self.existing_objects: Dict[Tuple[str, str], int] = {} self.base_url = base_url @@ -189,32 +199,50 @@ def get_size(self, prefix: str, oid: str) -> int: except KeyError: raise ObjectNotFound("Object does not exist") - def get_upload_action(self, prefix: str, oid: str, size: int, expires_in: int, - extra: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: + def get_upload_action( + self, + prefix: str, + oid: str, + size: int, + expires_in: int, + extra: Optional[Dict[str, Any]] = None, + ) -> Dict[str, Any]: return { "actions": { "upload": { "href": self._get_signed_url(prefix, oid, expires_in, extra), "header": {"x-foo-bar": "bazbaz"}, - "expires_in": expires_in + "expires_in": expires_in, } } } - def get_download_action(self, prefix: str, oid: str, size: int, expires_in: int, - extra: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: + def get_download_action( + self, + prefix: str, + oid: str, + size: int, + expires_in: int, + extra: Optional[Dict[str, Any]] = None, + ) -> Dict[str, Any]: return { "actions": { "download": { "href": self._get_signed_url(prefix, oid, expires_in, extra), "header": {}, - "expires_in": 900 + "expires_in": 900, } } } - def _get_signed_url(self, prefix: str, oid: str, expires_in: int, extra: Optional[Dict[str, Any]] = None): - url = '{}{}/{}?expires_in={}'.format(self.base_url, prefix, oid, expires_in) + def _get_signed_url( + self, + prefix: str, + oid: str, + expires_in: int, + extra: Optional[Dict[str, Any]] = None, + ): + url = "{}{}/{}?expires_in={}".format(self.base_url, prefix, oid, expires_in) if extra: - url = f'{url}&{urlencode(extra, doseq=False)}' + url = f"{url}&{urlencode(extra, doseq=False)}" return url diff --git a/tests/transfer/test_module.py b/tests/transfer/test_module.py index c820a48..f04d3c3 100644 --- a/tests/transfer/test_module.py +++ b/tests/transfer/test_module.py @@ -5,13 +5,16 @@ from giftless import transfer -@pytest.mark.parametrize('register,requested,expected', [ - (['basic'], ['basic'], 'basic'), - (['foobar', 'basic', 'bizbaz'], ['basic'], 'basic'), - (['foobar', 'basic', 'bizbaz'], ['foobar'], 'foobar'), - (['foobar', 'basic', 'bizbaz'], ['bizbaz', 'basic'], 'bizbaz'), -]) -@pytest.mark.usefixtures('reset_registered_transfers') +@pytest.mark.parametrize( + "register,requested,expected", + [ + (["basic"], ["basic"], "basic"), + (["foobar", "basic", "bizbaz"], ["basic"], "basic"), + (["foobar", "basic", "bizbaz"], ["foobar"], "foobar"), + (["foobar", "basic", "bizbaz"], ["bizbaz", "basic"], "bizbaz"), + ], +) +@pytest.mark.usefixtures("reset_registered_transfers") def test_transfer_adapter_matching(register, requested, expected): for adapter in register: transfer.register_adapter(adapter, transfer.TransferAdapter()) @@ -21,7 +24,7 @@ def test_transfer_adapter_matching(register, requested, expected): def test_transfer_adapter_matching_nomatch(): - for adapter in ['foobar', 'basic', 'bizbaz']: + for adapter in ["foobar", "basic", "bizbaz"]: transfer.register_adapter(adapter, transfer.TransferAdapter()) with pytest.raises(ValueError): - transfer.match_transfer_adapter(['complex', 'even-better']) + transfer.match_transfer_adapter(["complex", "even-better"]) diff --git a/tox.ini b/tox.ini index 222deed..8f2bbd3 100644 --- a/tox.ini +++ b/tox.ini @@ -4,11 +4,47 @@ # and then run "tox" from this directory. [tox] -envlist = py37, py38, py39, py310 +envlist = py,typing,lint,docs +isolated_build=true [testenv] deps = -rrequirements.txt -rdev-requirements.txt + +[testenv:coverage-report] +description = Compile coverage from each test run. +skip_install = true +deps = coverage[toml]>=5.0.2 +depends = + py-coverage +commands = coverage report + +[testenv:lint] +description = Lint codebase by running pre-commit (Black, isort, Flake8) +skip_install = true +deps = + pre-commit +commands = pre-commit run --all-files + +[testenv:py] +description = Run pytest +commands = + pytest -vv {posargs} + +[testenv:py-coverage] +description = Run pytest with Docker prerequisites and coverage analysis +commands = + pytest -vv --cov=giftless --cov-branch --cov-report= {posargs} + +[testenv:docs] +description = Build documentation (HTML) with Sphinx +allowlist_externals = + make +commands = + make docs + +[testenv:typing] +description = Run mypy commands = - pytest -v + mypy giftless tests From 303e5b858bf55cbbd496d85c0427a3f2f9b860a1 Mon Sep 17 00:00:00 2001 From: adam Date: Tue, 2 Jan 2024 11:27:46 -0700 Subject: [PATCH 2/4] Add .pre-commit-config.yaml --- .pre-commit-config.yaml | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 .pre-commit-config.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..353a41a --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,24 @@ +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 + hooks: + - id: check-merge-conflict + - id: check-toml + # FIXME: VCR is unhappy; address in test rewrite + # - id: check-yaml + # args: [--allow-multiple-documents] + - id: trailing-whitespace + + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.1.8 + hooks: + - id: ruff + args: [--fix, --exit-non-zero-on-fix] + - id: ruff-format + + - repo: https://github.com/adamchainz/blacken-docs + rev: 1.16.0 + hooks: + - id: blacken-docs + additional_dependencies: [black==23.10.1] + args: [-l, '79', -t, py311] From ff54ff3f5ffee522322615c3b80205846c6af175 Mon Sep 17 00:00:00 2001 From: adam Date: Tue, 2 Jan 2024 13:07:49 -0700 Subject: [PATCH 3/4] Clean up tests with black, too --- .pre-commit-config.yaml | 14 +- LICENSE | 2 +- changelog.d/_template.md.jinja | 7 + giftless/app.py | 3 +- giftless/auth/__init__.py | 22 +- giftless/auth/identity.py | 17 +- giftless/auth/jwt.py | 54 ++-- giftless/config.py | 16 +- giftless/storage/__init__.py | 20 +- giftless/storage/amazon_s3.py | 38 ++- giftless/storage/azure.py | 88 ++++-- giftless/storage/google_cloud.py | 23 +- giftless/storage/local_storage.py | 10 +- giftless/transfer/__init__.py | 41 +-- giftless/transfer/basic_external.py | 14 +- giftless/transfer/basic_streaming.py | 78 +++-- giftless/transfer/multipart.py | 14 +- giftless/transfer/types.py | 16 +- giftless/util.py | 13 +- giftless/view.py | 20 +- pyproject.toml | 270 ++++++++++++++++++ scripts/docker-tag.sh | 14 + tests/auth/test_auth.py | 63 +++- tests/auth/test_jwt.py | 127 ++++++-- tests/conftest.py | 14 +- tests/storage/__init__.py | 16 +- tests/storage/test_amazon_s3.py | 21 +- tests/storage/test_azure.py | 13 +- tests/storage/test_google_cloud.py | 7 +- tests/storage/test_local.py | 6 +- tests/test_batch_api.py | 92 ++++-- tests/test_error_responses.py | 15 +- tests/test_middleware.py | 19 +- tests/test_schema.py | 14 +- tests/transfer/conftest.py | 6 +- tests/transfer/test_basic_external_adapter.py | 38 +-- 36 files changed, 926 insertions(+), 319 deletions(-) create mode 100644 changelog.d/_template.md.jinja create mode 100644 pyproject.toml create mode 100755 scripts/docker-tag.sh diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 353a41a..7dccfc9 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -9,12 +9,14 @@ repos: # args: [--allow-multiple-documents] - id: trailing-whitespace - - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.1.8 - hooks: - - id: ruff - args: [--fix, --exit-non-zero-on-fix] - - id: ruff-format + # FIXME: introduce after initial cleanup; it's going to take a lot + # of work. + # - repo: https://github.com/astral-sh/ruff-pre-commit + # rev: v0.1.8 + # hooks: + # - id: ruff + # args: [--fix, --exit-non-zero-on-fix] + # - id: ruff-format - repo: https://github.com/adamchainz/blacken-docs rev: 1.16.0 diff --git a/LICENSE b/LICENSE index 65b52fd..e08dc69 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,4 @@ -Copyright 2020 Datopian (Viderum, Inc.) +Copyright 2020-2024 Datopian (Viderum, Inc.) Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/changelog.d/_template.md.jinja b/changelog.d/_template.md.jinja new file mode 100644 index 0000000..6e644b8 --- /dev/null +++ b/changelog.d/_template.md.jinja @@ -0,0 +1,7 @@ + +{%- for cat in config.categories %} + +### {{ cat }} + +- +{%- endfor %} diff --git a/giftless/app.py b/giftless/app.py index 9d768fd..770d2ad 100644 --- a/giftless/app.py +++ b/giftless/app.py @@ -25,7 +25,8 @@ def init_app(app=None, additional_config=None): else: level = logging.WARNING logging.basicConfig( - format="%(asctime)-15s %(name)-15s %(levelname)s %(message)s", level=level + format="%(asctime)-15s %(name)-15s %(levelname)s %(message)s", + level=level, ) # Load middleware diff --git a/giftless/auth/__init__.py b/giftless/auth/__init__.py index 7583b87..f808c05 100644 --- a/giftless/auth/__init__.py +++ b/giftless/auth/__init__.py @@ -2,8 +2,9 @@ """ import abc import logging +from collections.abc import Callable from functools import wraps -from typing import Any, Callable, Dict, List, Optional, Set, Union +from typing import Any, Optional, Union from flask import Request, current_app, g from flask import request as flask_request @@ -45,10 +46,10 @@ def get_authz_query_params( identity: Identity, org: str, repo: str, - actions: Optional[Set[str]] = None, + actions: Optional[set[str]] = None, oid: Optional[str] = None, lifetime: Optional[int] = None, - ) -> Dict[str, str]: + ) -> dict[str, str]: """Authorize an action by adding credientaisl to the query string""" return {} @@ -57,18 +58,20 @@ def get_authz_header( identity: Identity, org: str, repo: str, - actions: Optional[Set[str]] = None, + actions: Optional[set[str]] = None, oid: Optional[str] = None, lifetime: Optional[int] = None, - ) -> Dict[str, str]: + ) -> dict[str, str]: """Authorize an action by adding credentials to the request headers""" return {} class Authentication: - def __init__(self, app=None, default_identity: Optional[Identity] = None) -> None: + def __init__( + self, app=None, default_identity: Optional[Identity] = None + ) -> None: self._default_identity = default_identity - self._authenticators: List[Authenticator] = [] + self._authenticators: list[Authenticator] = [] self._unauthorized_handler: Optional[Callable] = None self.preauth_handler: Optional[PreAuthorizedActionAuthenticator] = None @@ -141,7 +144,8 @@ def init_authenticators(self, reload=False): ) self._authenticators = [ - _create_authenticator(a) for a in current_app.config["AUTH_PROVIDERS"] + _create_authenticator(a) + for a in current_app.config["AUTH_PROVIDERS"] ] if current_app.config["PRE_AUTHORIZED_ACTION_PROVIDER"]: @@ -173,7 +177,7 @@ def _authenticate(self) -> Optional[Identity]: return self._default_identity -def _create_authenticator(spec: Union[str, Dict[str, Any]]) -> Authenticator: +def _create_authenticator(spec: Union[str, dict[str, Any]]) -> Authenticator: """Instantiate an authenticator from configuration spec Configuration spec can be a string referencing a callable (e.g. mypackage.mymodule:callable) diff --git a/giftless/auth/identity.py b/giftless/auth/identity.py index c16363e..da9eccb 100644 --- a/giftless/auth/identity.py +++ b/giftless/auth/identity.py @@ -1,7 +1,7 @@ from abc import ABC, abstractmethod from collections import defaultdict from enum import Enum -from typing import Dict, Optional, Set +from typing import Optional class Permission(Enum): @@ -12,12 +12,12 @@ class Permission(Enum): WRITE = "write" @classmethod - def all(cls) -> Set["Permission"]: + def all(cls) -> set["Permission"]: return set(cls) -PermissionTree = Dict[ - Optional[str], Dict[Optional[str], Dict[Optional[str], Set[Permission]]] +PermissionTree = dict[ + Optional[str], dict[Optional[str], dict[Optional[str], set[Permission]]] ] @@ -41,10 +41,9 @@ def is_authorized( oid: Optional[str] = None, ) -> bool: """Tell if user is authorized to perform an operation on an object / repo""" - pass def __repr__(self): - return "<{} id:{} name:{}>".format(self.__class__.__name__, self.id, self.name) + return f"<{self.__class__.__name__} id:{self.id} name:{self.name}>" class DefaultIdentity(Identity): @@ -65,7 +64,7 @@ def allow( self, organization: Optional[str] = None, repo: Optional[str] = None, - permissions: Optional[Set[Permission]] = None, + permissions: Optional[set[Permission]] = None, oid: Optional[str] = None, ): if permissions is None: @@ -85,7 +84,9 @@ def is_authorized( if oid in self._allowed[organization][repo]: return permission in self._allowed[organization][repo][oid] elif None in self._allowed[organization][repo]: - return permission in self._allowed[organization][repo][None] + return ( + permission in self._allowed[organization][repo][None] + ) elif None in self._allowed[organization]: return permission in self._allowed[organization][None][None] elif None in self._allowed and None in self._allowed[None]: diff --git a/giftless/auth/jwt.py b/giftless/auth/jwt.py index e022b23..a1a6773 100644 --- a/giftless/auth/jwt.py +++ b/giftless/auth/jwt.py @@ -1,6 +1,6 @@ import logging from datetime import datetime, timedelta -from typing import Any, Dict, Optional, Set, Union +from typing import Any, Optional, Union import jwt from dateutil.tz import UTC @@ -132,11 +132,11 @@ def __call__(self, request: Request) -> Optional[Identity]: return None return self._get_identity(token_payload) - def get_authz_header(self, *args, **kwargs) -> Dict[str, str]: + def get_authz_header(self, *args, **kwargs) -> dict[str, str]: token = self._generate_token_for_action(*args, **kwargs) return {"Authorization": f"Bearer {token}"} - def get_authz_query_params(self, *args, **kwargs) -> Dict[str, str]: + def get_authz_query_params(self, *args, **kwargs) -> dict[str, str]: return {"jwt": self._generate_token_for_action(*args, **kwargs)} def _generate_token_for_action( @@ -144,12 +144,12 @@ def _generate_token_for_action( identity: Identity, org: str, repo: str, - actions: Optional[Set[str]] = None, + actions: Optional[set[str]] = None, oid: Optional[str] = None, lifetime: Optional[int] = None, ) -> str: """Generate a JWT token authorizing the specific requested action""" - token_payload: Dict[str, Any] = {"sub": identity.id} + token_payload: dict[str, Any] = {"sub": identity.id} if self.issuer: token_payload["iss"] = self.issuer if self.audience: @@ -160,11 +160,15 @@ def _generate_token_for_action( token_payload["name"] = identity.name # Scopes - token_payload["scopes"] = self._generate_action_scopes(org, repo, actions, oid) + token_payload["scopes"] = self._generate_action_scopes( + org, repo, actions, oid + ) # Custom lifetime if lifetime: - token_payload["exp"] = datetime.now(tz=UTC) + timedelta(seconds=lifetime) + token_payload["exp"] = datetime.now(tz=UTC) + timedelta( + seconds=lifetime + ) return self._generate_token(**token_payload) @@ -172,7 +176,7 @@ def _generate_token_for_action( def _generate_action_scopes( org: str, repo: str, - actions: Optional[Set[str]] = None, + actions: Optional[set[str]] = None, oid: Optional[str] = None, ) -> str: """Generate token scopes based on target object and actions""" @@ -188,8 +192,9 @@ def _generate_token(self, **kwargs) -> str: "This authenticator is not configured to generate tokens; Set private_key to fix" ) - payload: Dict[str, Any] = { - "exp": datetime.now(tz=UTC) + timedelta(seconds=self.default_lifetime), + payload: dict[str, Any] = { + "exp": datetime.now(tz=UTC) + + timedelta(seconds=self.default_lifetime), "iat": datetime.now(tz=UTC), "nbf": datetime.now(tz=UTC), } @@ -207,7 +212,10 @@ def _generate_token(self, **kwargs) -> str: headers["kid"] = self.key_id token = jwt.encode( - payload, self.private_key, algorithm=self.algorithm, headers=headers + payload, + self.private_key, + algorithm=self.algorithm, + headers=headers, ) # Type of jwt.encode() went from bytes to str in jwt 2.x, but the # typing hints somehow aren't keeping up. This lets us do the @@ -242,7 +250,7 @@ def _authenticate(self, request: Request): ) except jwt.PyJWTError as e: raise Unauthorized( - "Expired or otherwise invalid JWT token ({})".format(str(e)) + f"Expired or otherwise invalid JWT token ({e!s})" ) def _get_token_from_headers(self, request: Request) -> Optional[str]: @@ -266,7 +274,10 @@ def _get_token_from_headers(self, request: Request) -> Optional[str]: return payload elif authz_type.lower() == "basic" and self.basic_auth_user: parsed_header = Authorization.from_header(header) - if parsed_header and parsed_header.username == self.basic_auth_user: + if ( + parsed_header + and parsed_header.username == self.basic_auth_user + ): self._log.debug("Found token in Authorization: Basic header") if parsed_header.password is None: return None @@ -279,7 +290,7 @@ def _get_token_from_qs(request: Request) -> Optional[str]: """Get JWT token from the query string""" return request.args.get("jwt") - def _get_identity(self, jwt_payload: Dict[str, Any]) -> Identity: + def _get_identity(self, jwt_payload: dict[str, Any]) -> Identity: identity = DefaultIdentity( id=jwt_payload.get("sub"), email=jwt_payload.get("email"), @@ -293,7 +304,7 @@ def _get_identity(self, jwt_payload: Dict[str, Any]) -> Identity: return identity - def _parse_scope(self, scope_str: str) -> Dict[str, Any]: + def _parse_scope(self, scope_str: str) -> dict[str, Any]: """Parse a scope string and convert it into arguments for Identity.allow()""" scope = Scope.from_string(scope_str) if scope.entity_type != "obj": @@ -305,7 +316,8 @@ def _parse_scope(self, scope_str: str) -> Dict[str, Any]: if scope.entity_ref is not None: id_parts = [ - p if p != "*" else None for p in scope.entity_ref.split("/", maxsplit=2) + p if p != "*" else None + for p in scope.entity_ref.split("/", maxsplit=2) ] if len(id_parts) == 3: organization, repo, oid = id_parts @@ -324,7 +336,7 @@ def _parse_scope(self, scope_str: str) -> Dict[str, Any]: } @staticmethod - def _parse_scope_permissions(scope: "Scope") -> Set[Permission]: + def _parse_scope_permissions(scope: "Scope") -> set[Permission]: """Extract granted permissions from scope object""" permissions_map = { "read": {Permission.READ, Permission.READ_META}, @@ -360,7 +372,7 @@ def _get_verification_key(self) -> Union[str, bytes]: return self._verification_key -class Scope(object): +class Scope: """Scope object""" entity_type = None @@ -372,7 +384,7 @@ def __init__( self, entity_type: str, entity_id: Optional[str] = None, - actions: Optional[Set[str]] = None, + actions: Optional[set[str]] = None, subscope: Optional[str] = None, ): self.entity_type = entity_type @@ -381,7 +393,7 @@ def __init__( self.subscope = subscope def __repr__(self): - return "".format(str(self)) + return f"" def __str__(self): """Convert scope to a string""" @@ -429,7 +441,7 @@ def from_string(cls, scope_str): return scope @classmethod - def _parse_actions(cls, actions_str: str) -> Set[str]: + def _parse_actions(cls, actions_str: str) -> set[str]: if not actions_str: return set() return set(actions_str.split(",")) diff --git a/giftless/config.py b/giftless/config.py index 29ae682..0ea7d77 100644 --- a/giftless/config.py +++ b/giftless/config.py @@ -1,7 +1,7 @@ """Configuration handling helper functions and default configuration """ import os -from typing import Dict, Optional +from typing import Optional import figcan import yaml @@ -17,7 +17,9 @@ "options": figcan.Extensible( { "storage_class": "giftless.storage.local_storage:LocalStorage", - "storage_options": figcan.Extensible({"path": "lfs-storage"}), + "storage_options": figcan.Extensible( + {"path": "lfs-storage"} + ), "action_lifetime": 900, } ), @@ -48,17 +50,21 @@ load_dotenv() -def configure(app, additional_config: Optional[Dict] = None): +def configure(app, additional_config: Optional[dict] = None): """Configure a Flask app using Figcan managed configuration object""" config = _compose_config(additional_config) app.config.update(config) return app -def _compose_config(additional_config: Optional[Dict] = None) -> figcan.Configuration: +def _compose_config( + additional_config: Optional[dict] = None, +) -> figcan.Configuration: """Compose configuration object from all available sources""" config = figcan.Configuration(default_config) - environ = dict(os.environ) # Copy the environment as we're going to change it + environ = dict( + os.environ + ) # Copy the environment as we're going to change it if environ.get(f"{ENV_PREFIX}CONFIG_FILE"): with open(environ[f"{ENV_PREFIX}CONFIG_FILE"]) as f: diff --git a/giftless/storage/__init__.py b/giftless/storage/__init__.py index d51a45d..237664a 100644 --- a/giftless/storage/__init__.py +++ b/giftless/storage/__init__.py @@ -1,6 +1,7 @@ import mimetypes from abc import ABC, abstractmethod -from typing import Any, BinaryIO, Dict, Iterable, Optional +from collections.abc import Iterable +from typing import Any, BinaryIO, Optional from . import exc @@ -17,7 +18,6 @@ def verify_object(self, prefix: str, oid: str, size: int) -> bool: This method should not throw an error if the object does not exist, but return False """ - pass class StreamingStorage(VerifiableStorage, ABC): @@ -60,8 +60,8 @@ def get_upload_action( oid: str, size: int, expires_in: int, - extra: Optional[Dict[str, Any]] = None, - ) -> Dict[str, Any]: + extra: Optional[dict[str, Any]] = None, + ) -> dict[str, Any]: pass @abstractmethod @@ -71,8 +71,8 @@ def get_download_action( oid: str, size: int, expires_in: int, - extra: Optional[Dict[str, Any]] = None, - ) -> Dict[str, Any]: + extra: Optional[dict[str, Any]] = None, + ) -> dict[str, Any]: pass @abstractmethod @@ -99,8 +99,8 @@ def get_multipart_actions( size: int, part_size: int, expires_in: int, - extra: Optional[Dict[str, Any]] = None, - ) -> Dict[str, Any]: + extra: Optional[dict[str, Any]] = None, + ) -> dict[str, Any]: pass @abstractmethod @@ -110,8 +110,8 @@ def get_download_action( oid: str, size: int, expires_in: int, - extra: Optional[Dict[str, Any]] = None, - ) -> Dict[str, Any]: + extra: Optional[dict[str, Any]] = None, + ) -> dict[str, Any]: pass @abstractmethod diff --git a/giftless/storage/amazon_s3.py b/giftless/storage/amazon_s3.py index 687cd77..0c1999e 100644 --- a/giftless/storage/amazon_s3.py +++ b/giftless/storage/amazon_s3.py @@ -1,7 +1,8 @@ import base64 import binascii import posixpath -from typing import Any, BinaryIO, Dict, Iterable, Optional +from collections.abc import Iterable +from typing import Any, BinaryIO, Optional import boto3 import botocore @@ -40,7 +41,9 @@ def upload_callback(size): bucket = self.s3.Bucket(self.bucket_name) bucket.upload_fileobj( - data_stream, self._get_blob_path(prefix, oid), Callback=upload_callback + data_stream, + self._get_blob_path(prefix, oid), + Callback=upload_callback, ) return sum(completed) @@ -67,8 +70,8 @@ def get_upload_action( oid: str, size: int, expires_in: int, - extra: Optional[Dict[str, Any]] = None, - ) -> Dict[str, Any]: + extra: Optional[dict[str, Any]] = None, + ) -> dict[str, Any]: base64_oid = base64.b64encode(binascii.a2b_hex(oid)).decode("ascii") params = { "Bucket": self.bucket_name, @@ -98,16 +101,23 @@ def get_download_action( oid: str, size: int, expires_in: int, - extra: Optional[Dict[str, str]] = None, - ) -> Dict[str, Any]: - params = {"Bucket": self.bucket_name, "Key": self._get_blob_path(prefix, oid)} + extra: Optional[dict[str, str]] = None, + ) -> dict[str, Any]: + params = { + "Bucket": self.bucket_name, + "Key": self._get_blob_path(prefix, oid), + } filename = extra.get("filename") if extra else None - disposition = extra.get("disposition", "attachment") if extra else "attachment" + disposition = ( + extra.get("disposition", "attachment") if extra else "attachment" + ) if filename and disposition: filename = safe_filename(filename) - params["ResponseContentDisposition"] = f'attachment; filename="{filename}"' + params[ + "ResponseContentDisposition" + ] = f'attachment; filename="{filename}"' elif disposition: params["ResponseContentDisposition"] = disposition @@ -116,7 +126,11 @@ def get_download_action( ) return { "actions": { - "download": {"href": response, "header": {}, "expires_in": expires_in} + "download": { + "href": response, + "header": {}, + "expires_in": expires_in, + } } } @@ -131,4 +145,6 @@ def _get_blob_path(self, prefix: str, oid: str) -> str: return posixpath.join(storage_prefix, prefix, oid) def _s3_object(self, prefix, oid): - return self.s3.Object(self.bucket_name, self._get_blob_path(prefix, oid)) + return self.s3.Object( + self.bucket_name, self._get_blob_path(prefix, oid) + ) diff --git a/giftless/storage/azure.py b/giftless/storage/azure.py index 3cbda07..57a2025 100644 --- a/giftless/storage/azure.py +++ b/giftless/storage/azure.py @@ -2,8 +2,9 @@ import logging import posixpath from collections import namedtuple -from datetime import datetime, timedelta, timezone -from typing import IO, Any, Dict, Iterable, List, Optional +from collections.abc import Iterable +from datetime import UTC, datetime, timedelta +from typing import IO, Any, Optional from urllib.parse import urlencode from xml.sax.saxutils import escape as xml_escape @@ -13,8 +14,9 @@ BlobSasPermissions, BlobServiceClient, generate_blob_sas, -) # type: ignore +) +# type: ignore from giftless.storage import ( ExternalStorage, MultipartStorage, @@ -53,7 +55,8 @@ def __init__( def get(self, prefix: str, oid: str) -> Iterable[bytes]: blob_client = self.blob_svc_client.get_blob_client( - container=self.container_name, blob=self._get_blob_path(prefix, oid) + container=self.container_name, + blob=self._get_blob_path(prefix, oid), ) try: return blob_client.download_blob().chunks() # type: ignore @@ -62,7 +65,8 @@ def get(self, prefix: str, oid: str) -> Iterable[bytes]: def put(self, prefix: str, oid: str, data_stream: IO[bytes]) -> int: blob_client = self.blob_svc_client.get_blob_client( - container=self.container_name, blob=self._get_blob_path(prefix, oid) + container=self.container_name, + blob=self._get_blob_path(prefix, oid), ) blob_client.upload_blob(data_stream) # type: ignore return data_stream.tell() @@ -77,7 +81,8 @@ def exists(self, prefix: str, oid: str) -> bool: def get_size(self, prefix: str, oid: str) -> int: try: blob_client = self.blob_svc_client.get_blob_client( - container=self.container_name, blob=self._get_blob_path(prefix, oid) + container=self.container_name, + blob=self._get_blob_path(prefix, oid), ) props = blob_client.get_blob_properties() return props.size # type: ignore @@ -87,7 +92,8 @@ def get_size(self, prefix: str, oid: str) -> int: def get_mime_type(self, prefix: str, oid: str) -> Optional[str]: try: blob_client = self.blob_svc_client.get_blob_client( - container=self.container_name, blob=self._get_blob_path(prefix, oid) + container=self.container_name, + blob=self._get_blob_path(prefix, oid), ) props = blob_client.get_blob_properties() mime_type = props.content_settings.get( @@ -103,8 +109,8 @@ def get_upload_action( oid: str, size: int, expires_in: int, - extra: Optional[Dict[str, Any]] = None, - ) -> Dict[str, Any]: + extra: Optional[dict[str, Any]] = None, + ) -> dict[str, Any]: filename = extra.get("filename") if extra else None headers = { "x-ms-blob-type": "BlockBlob", @@ -135,10 +141,12 @@ def get_download_action( oid: str, size: int, expires_in: int, - extra: Optional[Dict[str, Any]] = None, - ) -> Dict[str, Any]: + extra: Optional[dict[str, Any]] = None, + ) -> dict[str, Any]: filename = extra.get("filename") if extra else None - disposition = extra.get("disposition", "attachment") if extra else "attachment" + disposition = ( + extra.get("disposition", "attachment") if extra else "attachment" + ) return { "actions": { @@ -164,15 +172,21 @@ def get_multipart_actions( size: int, part_size: int, expires_in: int, - extra: Optional[Dict[str, Any]] = None, - ) -> Dict[str, Any]: + extra: Optional[dict[str, Any]] = None, + ) -> dict[str, Any]: """Get actions for a multipart upload""" blocks = _calculate_blocks(size, part_size) uncommitted = self._get_uncommitted_blocks(prefix, oid, blocks) filename = extra.get("filename") if extra else None base_url = self._get_signed_url( - prefix, oid, expires_in, filename, create=True, write=True, delete=True + prefix, + oid, + expires_in, + filename, + create=True, + write=True, + delete=True, ) parts = [ self._create_part_request(base_url, b, expires_in) @@ -185,7 +199,7 @@ def get_multipart_actions( len(parts), ) commit_body = self._create_commit_body(blocks) - reply: Dict[str, Any] = { + reply: dict[str, Any] = { "actions": { "commit": { "method": "PUT", @@ -234,11 +248,13 @@ def _get_signed_url( ) -> str: blob_name = self._get_blob_path(prefix, oid) blob_permissions = BlobSasPermissions(**permissions) - token_expires = datetime.now(tz=timezone.utc) + timedelta(seconds=expires_in) + token_expires = datetime.now(tz=UTC) + timedelta(seconds=expires_in) - extra_args: Dict[str, Any] = {} + extra_args: dict[str, Any] = {} if filename and disposition: - extra_args["content_disposition"] = f'{disposition}; filename="{filename}"' + extra_args[ + "content_disposition" + ] = f'{disposition}; filename="{filename}"' elif disposition: extra_args["content_disposition"] = f'{disposition};"' @@ -261,11 +277,12 @@ def _get_signed_url( return blob_client.url # type: ignore def _get_uncommitted_blocks( - self, prefix: str, oid: str, blocks: List[Block] - ) -> Dict[int, int]: + self, prefix: str, oid: str, blocks: list[Block] + ) -> dict[int, int]: """Get list of uncommitted blocks from the server""" blob_client = self.blob_svc_client.get_blob_client( - container=self.container_name, blob=self._get_blob_path(prefix, oid) + container=self.container_name, + blob=self._get_blob_path(prefix, oid), ) try: committed_blocks, uncommitted_blocks = blob_client.get_block_list( @@ -283,7 +300,9 @@ def _get_uncommitted_blocks( try: # NOTE: The Azure python library already does ID base64 decoding for us, so we only case to int here - existing_blocks = {int(b["id"]): b["size"] for b in uncommitted_blocks} + existing_blocks = { + int(b["id"]): b["size"] for b in uncommitted_blocks + } except ValueError: _log.warning( "Some uncommitted blocks have unexpected ID format; restarting upload" @@ -291,12 +310,16 @@ def _get_uncommitted_blocks( return {} _log.debug( - "Found %d existing uncommitted blocks on server", len(existing_blocks) + "Found %d existing uncommitted blocks on server", + len(existing_blocks), ) # Verify that existing blocks are the same as what we plan to upload for block in blocks: - if block.id in existing_blocks and existing_blocks[block.id] != block.size: + if ( + block.id in existing_blocks + and existing_blocks[block.id] != block.size + ): _log.warning( "Uncommitted block size does not match our plan, restating upload" ) @@ -307,7 +330,7 @@ def _get_uncommitted_blocks( def _create_part_request( self, base_url: str, block: Block, expires_in: int - ) -> Dict[str, Any]: + ) -> dict[str, Any]: """Create the part request object for a block""" block_id = self._encode_block_id(block.id) part = { @@ -322,7 +345,7 @@ def _create_part_request( return part - def _create_commit_body(self, blocks: List[Block]) -> str: + def _create_commit_body(self, blocks: list[Block]) -> str: """Create the body for a 'Put Blocks' request we use in commit NOTE: This is a simple XML construct, so we don't import / depend on XML construction API @@ -347,7 +370,7 @@ def _encode_block_id(cls, b_id: int) -> str: ).decode("ascii") -def _calculate_blocks(file_size: int, part_size: int) -> List[Block]: +def _calculate_blocks(file_size: int, part_size: int) -> list[Block]: """Calculate the list of blocks in a blob >>> _calculate_blocks(30, 10) @@ -365,12 +388,17 @@ def _calculate_blocks(file_size: int, part_size: int) -> List[Block]: full_blocks = file_size // part_size last_block_size = file_size % part_size blocks = [ - Block(id=i, start=i * part_size, size=part_size) for i in range(full_blocks) + Block(id=i, start=i * part_size, size=part_size) + for i in range(full_blocks) ] if last_block_size: blocks.append( - Block(id=full_blocks, start=full_blocks * part_size, size=last_block_size) + Block( + id=full_blocks, + start=full_blocks * part_size, + size=last_block_size, + ) ) return blocks diff --git a/giftless/storage/google_cloud.py b/giftless/storage/google_cloud.py index 069eb0b..939db43 100644 --- a/giftless/storage/google_cloud.py +++ b/giftless/storage/google_cloud.py @@ -3,7 +3,7 @@ import json import posixpath from datetime import timedelta -from typing import Any, BinaryIO, Dict, Optional, Union +from typing import Any, BinaryIO, Optional, Union import google.auth from google.auth import impersonated_credentials @@ -33,7 +33,10 @@ def __init__( self.bucket_name = bucket_name self.path_prefix = path_prefix self.credentials: Optional[ - Union[service_account.Credentials, impersonated_credentials.Credentials] + Union[ + service_account.Credentials, + impersonated_credentials.Credentials, + ] ] = self._load_credentials(account_key_file, account_key_base64) self.storage_client = storage.Client( project=project_name, credentials=self.credentials @@ -80,8 +83,8 @@ def get_upload_action( oid: str, size: int, expires_in: int, - extra: Optional[Dict[str, Any]] = None, - ) -> Dict[str, Any]: + extra: Optional[dict[str, Any]] = None, + ) -> dict[str, Any]: return { "actions": { "upload": { @@ -100,10 +103,12 @@ def get_download_action( oid: str, size: int, expires_in: int, - extra: Optional[Dict[str, Any]] = None, - ) -> Dict[str, Any]: + extra: Optional[dict[str, Any]] = None, + ) -> dict[str, Any]: filename = extra.get("filename") if extra else None - disposition = extra.get("disposition", "attachment") if extra else "attachment" + disposition = ( + extra.get("disposition", "attachment") if extra else "attachment" + ) return { "actions": { @@ -173,7 +178,9 @@ def _load_credentials( ) elif account_key_base64: account_info = json.loads(base64.b64decode(account_key_base64)) - return service_account.Credentials.from_service_account_info(account_info) + return service_account.Credentials.from_service_account_info( + account_info + ) else: return None # Will use Workload Identity if available diff --git a/giftless/storage/local_storage.py b/giftless/storage/local_storage.py index 5b0ec82..06c92d5 100644 --- a/giftless/storage/local_storage.py +++ b/giftless/storage/local_storage.py @@ -1,6 +1,6 @@ import os import shutil -from typing import Any, BinaryIO, Dict, Optional +from typing import Any, BinaryIO, Optional from giftless.storage import MultipartStorage, StreamingStorage, exc from giftless.view import ViewProvider @@ -55,8 +55,8 @@ def get_multipart_actions( size: int, part_size: int, expires_in: int, - extra: Optional[Dict[str, Any]] = None, - ) -> Dict[str, Any]: + extra: Optional[dict[str, Any]] = None, + ) -> dict[str, Any]: return {} def get_download_action( @@ -65,8 +65,8 @@ def get_download_action( oid: str, size: int, expires_in: int, - extra: Optional[Dict[str, Any]] = None, - ) -> Dict[str, Any]: + extra: Optional[dict[str, Any]] = None, + ) -> dict[str, Any]: return {} def register_views(self, app): diff --git a/giftless/transfer/__init__.py b/giftless/transfer/__init__.py index 5e37925..8533d16 100644 --- a/giftless/transfer/__init__.py +++ b/giftless/transfer/__init__.py @@ -4,14 +4,15 @@ for more information about what transfer APIs do in Git LFS. """ from abc import ABC +from collections.abc import Callable from functools import partial -from typing import Any, Callable, Dict, List, Optional, Set, Tuple +from typing import Any, Optional from giftless.auth import Authentication, authentication from giftless.util import add_query_params, get_callable from giftless.view import ViewProvider -_registered_adapters: Dict[str, "TransferAdapter"] = {} +_registered_adapters: dict[str, "TransferAdapter"] = {} class TransferAdapter(ABC): @@ -23,9 +24,11 @@ def upload( repo: str, oid: str, size: int, - extra: Optional[Dict[str, Any]] = None, - ) -> Dict: - raise NotImplementedError("This transfer adapter is not fully implemented") + extra: Optional[dict[str, Any]] = None, + ) -> dict: + raise NotImplementedError( + "This transfer adapter is not fully implemented" + ) def download( self, @@ -33,15 +36,19 @@ def download( repo: str, oid: str, size: int, - extra: Optional[Dict[str, Any]] = None, - ) -> Dict: - raise NotImplementedError("This transfer adapter is not fully implemented") + extra: Optional[dict[str, Any]] = None, + ) -> dict: + raise NotImplementedError( + "This transfer adapter is not fully implemented" + ) def get_action( self, name: str, organization: str, repo: str - ) -> Callable[[str, int], Dict]: + ) -> Callable[[str, int], dict]: """Shortcut for quickly getting an action callable for transfer adapter objects""" - return partial(getattr(self, name), organization=organization, repo=repo) + return partial( + getattr(self, name), organization=organization, repo=repo + ) class PreAuthorizingTransferAdapter(TransferAdapter, ABC): @@ -60,7 +67,7 @@ def _preauth_url( original_url: str, org: str, repo: str, - actions: Optional[Set[str]] = None, + actions: Optional[set[str]] = None, oid: Optional[str] = None, lifetime: Optional[int] = None, ) -> str: @@ -81,10 +88,10 @@ def _preauth_headers( self, org: str, repo: str, - actions: Optional[Set[str]] = None, + actions: Optional[set[str]] = None, oid: Optional[str] = None, lifetime: Optional[int] = None, - ) -> Dict[str, str]: + ) -> dict[str, str]: if not (self._auth_module and self._auth_module.preauth_handler): return {} @@ -120,14 +127,16 @@ def register_adapter(key: str, adapter: TransferAdapter): _registered_adapters[key] = adapter -def match_transfer_adapter(transfers: List[str]) -> Tuple[str, TransferAdapter]: +def match_transfer_adapter( + transfers: list[str], +) -> tuple[str, TransferAdapter]: for t in transfers: if t in _registered_adapters: return t, _registered_adapters[t] - raise ValueError("Unable to match any transfer adapter: {}".format(transfers)) + raise ValueError(f"Unable to match any transfer adapter: {transfers}") -def _init_adapter(config: Dict) -> TransferAdapter: +def _init_adapter(config: dict) -> TransferAdapter: """Call adapter factory to create a transfer adapter instance""" factory: Callable[..., TransferAdapter] = get_callable(config["factory"]) adapter: TransferAdapter = factory(**config.get("options", {})) diff --git a/giftless/transfer/basic_external.py b/giftless/transfer/basic_external.py index 748b057..87c26a2 100644 --- a/giftless/transfer/basic_external.py +++ b/giftless/transfer/basic_external.py @@ -12,7 +12,7 @@ """ import posixpath -from typing import Any, Dict, Optional +from typing import Any, Optional from giftless.storage import ExternalStorage, exc from giftless.transfer import PreAuthorizingTransferAdapter, ViewProvider @@ -20,7 +20,9 @@ from giftless.util import get_callable -class BasicExternalBackendTransferAdapter(PreAuthorizingTransferAdapter, ViewProvider): +class BasicExternalBackendTransferAdapter( + PreAuthorizingTransferAdapter, ViewProvider +): def __init__(self, storage: ExternalStorage, default_action_lifetime: int): self.storage = storage self.action_lifetime = default_action_lifetime @@ -31,8 +33,8 @@ def upload( repo: str, oid: str, size: int, - extra: Optional[Dict[str, Any]] = None, - ) -> Dict: + extra: Optional[dict[str, Any]] = None, + ) -> dict: prefix = posixpath.join(organization, repo) response = {"oid": oid, "size": size} @@ -68,8 +70,8 @@ def download( repo: str, oid: str, size: int, - extra: Optional[Dict[str, Any]] = None, - ) -> Dict: + extra: Optional[dict[str, Any]] = None, + ) -> dict: prefix = posixpath.join(organization, repo) response = {"oid": oid, "size": size} diff --git a/giftless/transfer/basic_streaming.py b/giftless/transfer/basic_streaming.py index 624da0c..75c2f71 100644 --- a/giftless/transfer/basic_streaming.py +++ b/giftless/transfer/basic_streaming.py @@ -7,7 +7,7 @@ """ import posixpath -from typing import Any, Dict, Optional +from typing import Any, Optional import marshmallow from flask import Response, request, url_for @@ -45,8 +45,12 @@ def verify(self, organization, repo): ) prefix = posixpath.join(organization, repo) - if not self.storage.verify_object(prefix, payload["oid"], payload["size"]): - raise InvalidPayload("Object does not exist or size does not match") + if not self.storage.verify_object( + prefix, payload["oid"], payload["size"] + ): + raise InvalidPayload( + "Object does not exist or size does not match" + ) return Response(status=200) @classmethod @@ -56,7 +60,11 @@ def get_verify_url( """Get the URL for upload / download requests for this object""" op_name = f"{cls.__name__}:verify" url: str = url_for( - op_name, organization=organization, repo=repo, oid=oid, _external=True + op_name, + organization=organization, + repo=repo, + oid=oid, + _external=True, ) return url @@ -75,9 +83,13 @@ def put(self, organization, repo, oid): into the WSGI Server -> Werkzeug -> Flask stack, and it may also depend on specific WSGI server implementation and even how a proxy (e.g. nginx) is configured. """ - self._check_authorization(organization, repo, Permission.WRITE, oid=oid) + self._check_authorization( + organization, repo, Permission.WRITE, oid=oid + ) stream = request.stream - self.storage.put(prefix=f"{organization}/{repo}", oid=oid, data_stream=stream) + self.storage.put( + prefix=f"{organization}/{repo}", oid=oid, data_stream=stream + ) return Response(status=200) def get(self, organization, repo, oid): @@ -91,7 +103,9 @@ def get(self, organization, repo, oid): headers = {} if filename and disposition: - headers = {"Content-Disposition": f'attachment; filename="{filename}"'} + headers = { + "Content-Disposition": f'attachment; filename="{filename}"' + } elif disposition: headers = {"Content-Disposition": disposition} @@ -99,23 +113,35 @@ def get(self, organization, repo, oid): file = self.storage.get(path, oid) mime_type = self.storage.get_mime_type(path, oid) headers["Content-Type"] = mime_type - return Response(file, direct_passthrough=True, status=200, headers=headers) + return Response( + file, direct_passthrough=True, status=200, headers=headers + ) else: raise NotFound("The object was not found") @classmethod def get_storage_url( - cls, operation: str, organization: str, repo: str, oid: Optional[str] = None + cls, + operation: str, + organization: str, + repo: str, + oid: Optional[str] = None, ) -> str: """Get the URL for upload / download requests for this object""" op_name = f"{cls.__name__}:{operation}" url: str = url_for( - op_name, organization=organization, repo=repo, oid=oid, _external=True + op_name, + organization=organization, + repo=repo, + oid=oid, + _external=True, ) return url -class BasicStreamingTransferAdapter(PreAuthorizingTransferAdapter, ViewProvider): +class BasicStreamingTransferAdapter( + PreAuthorizingTransferAdapter, ViewProvider +): def __init__(self, storage: StreamingStorage, action_lifetime: int): self.storage = storage self.action_lifetime = action_lifetime @@ -126,8 +152,8 @@ def upload( repo: str, oid: str, size: int, - extra: Optional[Dict[str, Any]] = None, - ) -> Dict: + extra: Optional[dict[str, Any]] = None, + ) -> dict: response = {"oid": oid, "size": size} prefix = posixpath.join(organization, repo) @@ -137,7 +163,9 @@ def upload( ): response["actions"] = { "upload": { - "href": ObjectsView.get_storage_url("put", organization, repo, oid), + "href": ObjectsView.get_storage_url( + "put", organization, repo, oid + ), "header": self._preauth_headers( organization, repo, actions={"write"}, oid=oid ), @@ -165,19 +193,27 @@ def download( repo: str, oid: str, size: int, - extra: Optional[Dict[str, Any]] = None, - ) -> Dict: + extra: Optional[dict[str, Any]] = None, + ) -> dict: response = {"oid": oid, "size": size} prefix = posixpath.join(organization, repo) if not self.storage.exists(prefix, oid): - response["error"] = {"code": 404, "message": "Object does not exist"} + response["error"] = { + "code": 404, + "message": "Object does not exist", + } elif self.storage.get_size(prefix, oid) != size: - response["error"] = {"code": 422, "message": "Object size does not match"} + response["error"] = { + "code": 422, + "message": "Object size does not match", + } else: - download_url = ObjectsView.get_storage_url("get", organization, repo, oid) + download_url = ObjectsView.get_storage_url( + "get", organization, repo, oid + ) preauth_url = self._preauth_url( download_url, organization, repo, actions={"read"}, oid=oid ) @@ -205,4 +241,6 @@ def register_views(self, app): def factory(storage_class, storage_options, action_lifetime): """Factory for basic transfer adapter with local storage""" storage = get_callable(storage_class, __name__) - return BasicStreamingTransferAdapter(storage(**storage_options), action_lifetime) + return BasicStreamingTransferAdapter( + storage(**storage_options), action_lifetime + ) diff --git a/giftless/transfer/multipart.py b/giftless/transfer/multipart.py index 6415296..9b71769 100644 --- a/giftless/transfer/multipart.py +++ b/giftless/transfer/multipart.py @@ -2,7 +2,7 @@ """ import posixpath -from typing import Any, Dict, Optional +from typing import Any, Optional from giftless.storage import MultipartStorage, exc from giftless.transfer import PreAuthorizingTransferAdapter, ViewProvider @@ -30,8 +30,8 @@ def upload( repo: str, oid: str, size: int, - extra: Optional[Dict[str, Any]] = None, - ) -> Dict: + extra: Optional[dict[str, Any]] = None, + ) -> dict: prefix = posixpath.join(organization, repo) response = {"oid": oid, "size": size} @@ -66,8 +66,8 @@ def download( repo: str, oid: str, size: int, - extra: Optional[Dict[str, Any]] = None, - ) -> Dict: + extra: Optional[dict[str, Any]] = None, + ) -> dict: prefix = posixpath.join(organization, repo) response = {"oid": oid, "size": size} @@ -114,5 +114,7 @@ def factory( except (AttributeError, ImportError): raise ValueError(f"Unable to load storage module: {storage_class}") return MultipartTransferAdapter( - storage(**storage_options), action_lifetime, max_part_size=max_part_size + storage(**storage_options), + action_lifetime, + max_part_size=max_part_size, ) diff --git a/giftless/transfer/types.py b/giftless/transfer/types.py index 4732952..151f7be 100644 --- a/giftless/transfer/types.py +++ b/giftless/transfer/types.py @@ -1,7 +1,7 @@ """Some useful type definitions for transfer protocols """ import sys -from typing import Any, Dict, List +from typing import Any if sys.version_info >= (3, 8): from typing import TypedDict @@ -17,8 +17,8 @@ class ObjectAttributes(TypedDict): class BasicUploadActions(TypedDict, total=False): - upload: Dict[str, Any] - verify: Dict[str, Any] + upload: dict[str, Any] + verify: dict[str, Any] class UploadObjectAttributes(ObjectAttributes, total=False): @@ -26,11 +26,11 @@ class UploadObjectAttributes(ObjectAttributes, total=False): class MultipartUploadActions(TypedDict, total=False): - init: Dict[str, Any] - commit: Dict[str, Any] - parts: List[Dict[str, Any]] - abort: Dict[str, Any] - verify: Dict[str, Any] + init: dict[str, Any] + commit: dict[str, Any] + parts: list[dict[str, Any]] + abort: dict[str, Any] + verify: dict[str, Any] class MultipartUploadObjectAttributes(ObjectAttributes, total=False): diff --git a/giftless/util.py b/giftless/util.py index c25cc18..1ada902 100644 --- a/giftless/util.py +++ b/giftless/util.py @@ -1,11 +1,14 @@ """Miscellanea """ import importlib -from typing import Any, Callable, Dict, Iterable, Optional +from collections.abc import Callable, Iterable +from typing import Any, Optional from urllib.parse import urlencode -def get_callable(callable_str: str, base_package: Optional[str] = None) -> Callable: +def get_callable( + callable_str: str, base_package: Optional[str] = None +) -> Callable: """Get a callable function / class constructor from a string of the form `package.subpackage.module:callable` @@ -57,7 +60,7 @@ def to_iterable(val: Any) -> Iterable: return (val,) -def add_query_params(url: str, params: Dict[str, Any]) -> str: +def add_query_params(url: str, params: dict[str, Any]) -> str: """Safely add query params to a url that may or may not already contain query params. @@ -82,5 +85,7 @@ def safe_filename(original_filename: str) -> str: >>> safe_filename("_ex@mple 2%.old.xlsx") '_exmple2.old.xlsx' """ - valid_chars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_." + valid_chars = ( + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_." + ) return "".join(c for c in original_filename if c in valid_chars) diff --git a/giftless/view.py b/giftless/view.py index 013a4f9..8bb0d9f 100644 --- a/giftless/view.py +++ b/giftless/view.py @@ -1,6 +1,6 @@ """Flask-Classful View Classes """ -from typing import Any, Dict, Optional +from typing import Any, Optional from flask_classful import FlaskView from webargs.flaskparser import parser # type: ignore @@ -35,13 +35,17 @@ def register(cls, *args, **kwargs): def _check_authorization(cls, organization, repo, permission, oid=None): """Check the current user is authorized to perform an action and raise an exception otherwise""" if not cls._is_authorized(organization, repo, permission, oid): - raise exc.Forbidden("Your are not authorized to perform this action") + raise exc.Forbidden( + "Your are not authorized to perform this action" + ) @staticmethod def _is_authorized(organization, repo, permission, oid=None): """Check the current user is authorized to perform an action""" identity = authn.get_identity() - return identity and identity.is_authorized(organization, repo, permission, oid) + return identity and identity.is_authorized( + organization, repo, permission, oid + ) class BatchView(BaseView): @@ -76,14 +80,18 @@ def post(self, organization, repo): raise response = {"transfer": transfer_type} - action = adapter.get_action(payload["operation"].value, organization, repo) + action = adapter.get_action( + payload["operation"].value, organization, repo + ) response["objects"] = [action(**o) for o in payload["objects"]] if all(self._is_error(o, 404) for o in response["objects"]): raise exc.NotFound("Cannot find any of the requested objects") if all(self._is_error(o) for o in response["objects"]): - raise exc.InvalidPayload("Cannot validate any of the requested objects") + raise exc.InvalidPayload( + "Cannot validate any of the requested objects" + ) # TODO: Check Accept header # TODO: do we need an output schema? @@ -91,7 +99,7 @@ def post(self, organization, repo): return response @staticmethod - def _is_error(obj: Dict[str, Any], code: Optional[int] = None): + def _is_error(obj: dict[str, Any], code: Optional[int] = None): try: return obj["error"]["code"] == code or code is None except KeyError: diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..ce5e7ea --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,270 @@ +[project] +# https://packaging.python.org/en/latest/specifications/declaring-project-metadata/ +name = "giftless" +description = "A Git LFS Server implementation in Python with support for pluggable backends" +license = {file = "LICENSE"} +readme = "README.md" +keywords = [ + "git", + "git-lfs", + "datopian", +] +# https://pypi.org/classifiers/ +classifiers = [ + "Development Status :: 5 - Production/Stable", + "License :: OSI Approved:: MIT License", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.11", + "Natural Language :: English", + "Operating System :: POSIX", + "Typing :: Typed", +] +requires-python = ">=3.11" +dependencies = [] +dynamic = ["version"] + +[[project.authors]] +name="Shahar Evron" +email="shahar.evron@datopian.com" + +[[project.authors]] +name="Rufus Pollock" +email="hello@rufuspollock.com" + +[[project.authors]] +name="Adam Thornton" +email="athornton@lsst.org" + +[project.urls] +Homepage = "https://giftless.datopian.com" +Source = "https://github.com/datopian/giftless" +"Issue tracker" = "https://github.com/datopian/giftless/issues" + +[build-system] +requires = [ + "setuptools>=61", + "wheel", + "setuptools_scm[toml]>=6.2", +] +build-backend = "setuptools.build_meta" + +[tool.setuptools_scm] + +[tool.black] +line-length = 79 +target-version = ["py311"] +exclude = ''' +/( + \.eggs + | \.git + | \.mypy_cache + | \.tox + | \.venv + | _build + | build + | dist +)/ +''' +# Use single-quoted strings so TOML treats the string like a Python r-string +# Multi-line strings are implicitly treated by black as regular expressions + +[tool.coverage.run] +parallel = true +branch = true +source = ["giftless"] + +[tool.coverage.paths] +source = ["giftless", ".tox/*/site-packages"] + +[tool.coverage.report] +show_missing = true +exclude_lines = [ + "pragma: no cover", + "def __repr__", + "if self.debug:", + "if settings.DEBUG", + "raise AssertionError", + "raise NotImplementedError", + "if 0:", + "if __name__ == .__main__.:", + "if TYPE_CHECKING:" +] + +[tool.mypy] +disallow_untyped_defs = true +disallow_incomplete_defs = true +ignore_missing_imports = true +local_partial_types = true +no_implicit_reexport = true +plugins = [ + "pydantic.mypy", + "sqlalchemy.ext.mypy.plugin", +] +show_error_codes = true +strict_equality = true +warn_redundant_casts = true +warn_unreachable = true +warn_unused_ignores = true + +[tool.pydantic-mypy] +init_forbid_extra = true +init_typed = true +warn_required_dynamic_aliases = true +warn_untyped_fields = true + +[tool.pytest.ini_options] +asyncio_mode = "strict" +filterwarnings = [ + # Google modules call a deprecated pkg_resources API. + "ignore:pkg_resources is deprecated as an API:DeprecationWarning", + "ignore:.*pkg_resources\\.declare_namespace:DeprecationWarning", + # Bug in kopf + "ignore:.*require all values to be sortable:DeprecationWarning:kopf.*", +] +# The python_files setting is not for test detection (pytest will pick up any +# test files named *_test.py without this setting) but to enable special +# assert processing in any non-test supporting files under tests. We +# conventionally put test support functions under tests.support and may +# sometimes use assert in test fixtures in conftest.py, and pytest only +# enables magical assert processing (showing a full diff on assert failures +# with complex data structures rather than only the assert message) in files +# listed in python_files. +python_files = [ + "tests/*.py", + "tests/*/*.py" +] + +# The rule used with Ruff configuration is to disable every lint that has +# legitimate exceptions that are not dodgy code, rather than cluttering code +# with noqa markers. This is therefore a reiatively relaxed configuration that +# errs on the side of disabling legitimate lints. +# +# Reference for settings: https://docs.astral.sh/ruff/settings/ +# Reference for rules: https://docs.astral.sh/ruff/rules/ +[tool.ruff] +exclude = [ + "docs/conf.py", +] +line-length = 79 +ignore = [ + "ANN101", # self should not have a type annotation + "ANN102", # cls should not have a type annotation + "ANN401", # sometimes Any is the right type + "ARG001", # unused function arguments are often legitimate + "ARG002", # unused method arguments are often legitimate + "ARG005", # unused lambda arguments are often legitimate + "BLE001", # we want to catch and report Exception in background tasks + "C414", # nested sorted is how you sort by multiple keys with reverse + "COM812", # omitting trailing commas allows black autoreformatting + "D102", # sometimes we use docstring inheritence + "D104", # don't see the point of documenting every package + "D105", # our style doesn't require docstrings for magic methods + "D106", # Pydantic uses a nested Config class that doesn't warrant docs + "D205", # our documentation style allows a folded first line + "EM101", # justification (duplicate string in traceback) is silly + "EM102", # justification (duplicate string in traceback) is silly + "FBT003", # positional booleans are normal for Pydantic field defaults + "FIX002", # point of a TODO comment is that we're not ready to fix it + "G004", # forbidding logging f-strings is appealing, but not our style + "RET505", # disagree that omitting else always makes code more readable + "PLR0911", # often many returns is clearer and simpler style + "PLR0913", # factory pattern uses constructors with many arguments + "PLR2004", # too aggressive about magic values + "PLW0603", # yes global is discouraged but if needed, it's needed + "S105", # good idea but too many false positives on non-passwords + "S106", # good idea but too many false positives on non-passwords + "S107", # good idea but too many false positives on non-passwords + "S603", # not going to manually mark every subprocess call as reviewed + "S607", # using PATH is not a security vulnerability + "SIM102", # sometimes the formatting of nested if statements is clearer + "SIM117", # sometimes nested with contexts are clearer + "TCH001", # we decided to not maintain separate TYPE_CHECKING blocks + "TCH002", # we decided to not maintain separate TYPE_CHECKING blocks + "TCH003", # we decided to not maintain separate TYPE_CHECKING blocks + "TD003", # we don't require issues be created for TODOs + "TID252", # if we're going to use relative imports, use them always + "TRY003", # good general advice but lint is way too aggressive + "TRY301", # sometimes raising exceptions inside try is the best flow + + # The following settings should be disabled when using ruff format + # per https://docs.astral.sh/ruff/formatter/#conflicting-lint-rules + "W191", + "E111", + "E114", + "E117", + "D206", + "D300", + "Q000", + "Q001", + "Q002", + "Q003", + "COM812", + "COM819", + "ISC001", + "ISC002", +] +select = ["ALL"] +target-version = "py311" + +[tool.ruff.per-file-ignores] +"tests/**" = [ + "C901", # tests are allowed to be complex, sometimes that's convenient + "D101", # tests don't need docstrings + "D103", # tests don't need docstrings + "PLR0915", # tests are allowed to be long, sometimes that's convenient + "PT012", # way too aggressive about limiting pytest.raises blocks + "S101", # tests should use assert + "S106", # tests are allowed to hard-code dummy passwords + "SLF001", # tests are allowed to access private members +] + +[tool.ruff.isort] +known-first-party = ["giftless", "tests"] +split-on-trailing-comma = false + +[tool.ruff.flake8-bugbear] +extend-immutable-calls = [ + "fastapi.Form", + "fastapi.Header", + "fastapi.Depends", + "fastapi.Path", + "fastapi.Query", +] + +# These are too useful as attributes or methods to allow the conflict with the +# built-in to rule out their use. +[tool.ruff.flake8-builtins] +builtins-ignorelist = [ + "all", + "any", + "dict", + "help", + "id", + "list", + "open", + "type", +] + +[tool.ruff.flake8-pytest-style] +fixture-parentheses = false +mark-parentheses = false + +[tool.ruff.mccabe] +max-complexity = 11 + +[tool.ruff.pydocstyle] +convention = "numpy" + +[tool.scriv] +categories = [ + "Backwards-incompatible changes", + "New features", + "Bug fixes", + "Other changes", +] +entry_title_template = "{{ version }} ({{ date.strftime('%Y-%m-%d') }})" +format = "md" +md_header_level = "2" +new_fragment_template = "file:changelog.d/_template.md.jinja" +skip_fragments = "_template.md.jinja" diff --git a/scripts/docker-tag.sh b/scripts/docker-tag.sh new file mode 100755 index 0000000..080a9c9 --- /dev/null +++ b/scripts/docker-tag.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +# Determine the tag for Docker images based on GitHub Actions environment +# variables. + +set -eo pipefail + +if [ -n "$GITHUB_HEAD_REF" ]; then + # For pull requests + echo ${GITHUB_HEAD_REF} | sed -E 's,/,-,g' +else + # For push events + echo ${GITHUB_REF} | sed -E 's,refs/(heads|tags)/,,' | sed -E 's,/,-,g' +fi diff --git a/tests/auth/test_auth.py b/tests/auth/test_auth.py index b8b7948..5842090 100644 --- a/tests/auth/test_auth.py +++ b/tests/auth/test_auth.py @@ -8,7 +8,9 @@ def test_default_identity_properties(): """Test the basic properties of the default identity object""" - user = DefaultIdentity("arthur", "kingofthebritons", "arthur@camelot.gov.uk") + user = DefaultIdentity( + "arthur", "kingofthebritons", "arthur@camelot.gov.uk" + ) assert user.name == "arthur" assert user.id == "kingofthebritons" assert user.email == "arthur@camelot.gov.uk" @@ -17,7 +19,13 @@ def test_default_identity_properties(): @pytest.mark.parametrize( "requested", [ - ({"permission": Permission.READ, "organization": "myorg", "repo": "somerepo"}), + ( + { + "permission": Permission.READ, + "organization": "myorg", + "repo": "somerepo", + } + ), ( { "permission": Permission.READ, @@ -33,11 +41,19 @@ def test_default_identity_properties(): "oid": "foobar", } ), - ({"permission": Permission.WRITE, "organization": "myorg", "repo": "somerepo"}), + ( + { + "permission": Permission.WRITE, + "organization": "myorg", + "repo": "somerepo", + } + ), ], ) def test_default_identity_denied_by_default(requested): - user = DefaultIdentity("arthur", "kingofthebritons", "arthur@camelot.gov.uk") + user = DefaultIdentity( + "arthur", "kingofthebritons", "arthur@camelot.gov.uk" + ) assert user.is_authorized(**requested) is False @@ -88,8 +104,12 @@ def test_default_identity_denied_by_default(requested): ], ) def test_default_identity_allow_specific_repo(requested, expected): - user = DefaultIdentity("arthur", "kingofthebritons", "arthur@camelot.gov.uk") - user.allow(organization="myorg", repo="somerepo", permissions=Permission.all()) + user = DefaultIdentity( + "arthur", "kingofthebritons", "arthur@camelot.gov.uk" + ) + user.allow( + organization="myorg", repo="somerepo", permissions=Permission.all() + ) assert expected is user.is_authorized(**requested) @@ -147,9 +167,12 @@ def test_default_identity_allow_specific_repo(requested, expected): ], ) def test_default_identity_allow_specific_org_permissions(requested, expected): - user = DefaultIdentity("arthur", "kingofthebritons", "arthur@camelot.gov.uk") + user = DefaultIdentity( + "arthur", "kingofthebritons", "arthur@camelot.gov.uk" + ) user.allow( - organization="myorg", permissions={Permission.READ_META, Permission.READ} + organization="myorg", + permissions={Permission.READ_META, Permission.READ}, ) assert expected is user.is_authorized(**requested) @@ -158,7 +181,11 @@ def test_default_identity_allow_specific_org_permissions(requested, expected): "requested, expected", [ ( - {"organization": "myorg", "repo": "myrepo", "permission": Permission.READ}, + { + "organization": "myorg", + "repo": "myrepo", + "permission": Permission.READ, + }, True, ), ( @@ -178,7 +205,11 @@ def test_default_identity_allow_specific_org_permissions(requested, expected): True, ), ( - {"organization": "myorg", "repo": "myrepo", "permission": Permission.WRITE}, + { + "organization": "myorg", + "repo": "myrepo", + "permission": Permission.WRITE, + }, False, ), ( @@ -201,7 +232,11 @@ def test_allow_anon_read_only(requested, expected): "requested, expected", [ ( - {"organization": "myorg", "repo": "myrepo", "permission": Permission.READ}, + { + "organization": "myorg", + "repo": "myrepo", + "permission": Permission.READ, + }, True, ), ( @@ -221,7 +256,11 @@ def test_allow_anon_read_only(requested, expected): True, ), ( - {"organization": "myorg", "repo": "myrepo", "permission": Permission.WRITE}, + { + "organization": "myorg", + "repo": "myrepo", + "permission": Permission.WRITE, + }, True, ), ( diff --git a/tests/auth/test_jwt.py b/tests/auth/test_jwt.py index c512cb3..0e5cce3 100644 --- a/tests/auth/test_jwt.py +++ b/tests/auth/test_jwt.py @@ -15,8 +15,12 @@ JWT_HS_KEY = b"some-random-secret" # Asymmetric key files used in tests -JWT_RS_PRI_KEY = os.path.join(os.path.dirname(__file__), "data", "test-key.pem") -JWT_RS_PUB_KEY = os.path.join(os.path.dirname(__file__), "data", "test-key.pub.pem") +JWT_RS_PRI_KEY = os.path.join( + os.path.dirname(__file__), "data", "test-key.pem" +) +JWT_RS_PUB_KEY = os.path.join( + os.path.dirname(__file__), "data", "test-key.pub.pem" +) def test_jwt_can_authorize_request_symmetric_key(app): @@ -60,9 +64,9 @@ def test_jwt_can_authorize_request_token_as_basic_password(app): """Test that we can pass a JWT token as 'Basic' authorization password""" authz = JWTAuthenticator(private_key=JWT_HS_KEY, algorithm="HS256") token = _get_test_token() - auth_value = base64.b64encode(b":".join([b"_jwt", token.encode("ascii")])).decode( - "ascii" - ) + auth_value = base64.b64encode( + b":".join([b"_jwt", token.encode("ascii")]) + ).decode("ascii") with app.test_request_context( "/myorg/myrepo/objects/batch", @@ -79,9 +83,9 @@ def test_jwt_can_authorize_request_token_basic_password_disabled(app): private_key=JWT_HS_KEY, algorithm="HS256", basic_auth_user=None ) token = _get_test_token() - auth_value = base64.b64encode(b":".join([b"_jwt", token.encode("ascii")])).decode( - "ascii" - ) + auth_value = base64.b64encode( + b":".join([b"_jwt", token.encode("ascii")]) + ).decode("ascii") with app.test_request_context( "/myorg/myrepo/objects/batch", @@ -124,11 +128,15 @@ def test_jwt_pre_authorize_action(): authz = JWTAuthenticator( private_key=JWT_HS_KEY, algorithm="HS256", default_lifetime=120 ) - identity = DefaultIdentity(name="joe", email="joe@shmoe.com", id="babab0ba") - header = authz.get_authz_header(identity, "myorg", "somerepo", actions={"read"}) + identity = DefaultIdentity( + name="joe", email="joe@shmoe.com", id="babab0ba" + ) + header = authz.get_authz_header( + identity, "myorg", "somerepo", actions={"read"} + ) auth_type, token = header["Authorization"].split(" ") - assert "Bearer" == auth_type + assert auth_type == "Bearer" payload = jwt.decode(token, JWT_HS_KEY, algorithms="HS256") assert payload["sub"] == "babab0ba" @@ -136,7 +144,11 @@ def test_jwt_pre_authorize_action(): # Check that now() - expiration time is within 5 seconds of 120 seconds assert ( - abs((datetime.fromtimestamp(payload["exp"]) - datetime.now()).seconds - 120) < 5 + abs( + (datetime.fromtimestamp(payload["exp"]) - datetime.now()).seconds + - 120 + ) + < 5 ) @@ -144,13 +156,15 @@ def test_jwt_pre_authorize_action_custom_lifetime(): authz = JWTAuthenticator( private_key=JWT_HS_KEY, algorithm="HS256", default_lifetime=120 ) - identity = DefaultIdentity(name="joe", email="joe@shmoe.com", id="babab0ba") + identity = DefaultIdentity( + name="joe", email="joe@shmoe.com", id="babab0ba" + ) header = authz.get_authz_header( identity, "myorg", "somerepo", actions={"read"}, lifetime=3600 ) auth_type, token = header["Authorization"].split(" ") - assert "Bearer" == auth_type + assert auth_type == "Bearer" payload = jwt.decode(token, JWT_HS_KEY, algorithms="HS256") assert payload["sub"] == "babab0ba" @@ -158,7 +172,10 @@ def test_jwt_pre_authorize_action_custom_lifetime(): # Check that now() - expiration time is within 5 seconds of 3600 seconds assert ( - abs((datetime.fromtimestamp(payload["exp"]) - datetime.now()).seconds - 3600) + abs( + (datetime.fromtimestamp(payload["exp"]) - datetime.now()).seconds + - 3600 + ) < 5 ) @@ -168,47 +185,83 @@ def test_jwt_pre_authorize_action_custom_lifetime(): [ ( [], - {"organization": "myorg", "repo": "myrepo", "permission": Permission.READ}, + { + "organization": "myorg", + "repo": "myrepo", + "permission": Permission.READ, + }, False, ), ( ["blah:foo/bar:*"], - {"organization": "myorg", "repo": "myrepo", "permission": Permission.READ}, + { + "organization": "myorg", + "repo": "myrepo", + "permission": Permission.READ, + }, False, ), ( ["obj:myorg/myrepo/*"], - {"organization": "myorg", "repo": "myrepo", "permission": Permission.READ}, + { + "organization": "myorg", + "repo": "myrepo", + "permission": Permission.READ, + }, True, ), ( ["obj:myorg/myrepo/*"], - {"organization": "myorg", "repo": "myrepo", "permission": Permission.WRITE}, + { + "organization": "myorg", + "repo": "myrepo", + "permission": Permission.WRITE, + }, True, ), ( ["obj:myorg/otherrepo/*"], - {"organization": "myorg", "repo": "myrepo", "permission": Permission.READ}, + { + "organization": "myorg", + "repo": "myrepo", + "permission": Permission.READ, + }, False, ), ( ["obj:myorg/myrepo/*"], - {"organization": "myorg", "repo": "myrepo", "permission": Permission.READ}, + { + "organization": "myorg", + "repo": "myrepo", + "permission": Permission.READ, + }, True, ), ( ["obj:myorg/myrepo/*:read"], - {"organization": "myorg", "repo": "myrepo", "permission": Permission.WRITE}, + { + "organization": "myorg", + "repo": "myrepo", + "permission": Permission.WRITE, + }, False, ), ( ["obj:myorg/myrepo/*:write"], - {"organization": "myorg", "repo": "myrepo", "permission": Permission.WRITE}, + { + "organization": "myorg", + "repo": "myrepo", + "permission": Permission.WRITE, + }, True, ), ( ["obj:myorg/myrepo/*:read,write"], - {"organization": "myorg", "repo": "myrepo", "permission": Permission.WRITE}, + { + "organization": "myorg", + "repo": "myrepo", + "permission": Permission.WRITE, + }, True, ), ( @@ -240,7 +293,11 @@ def test_jwt_pre_authorize_action_custom_lifetime(): ), ( ["obj:myorg/myrepo/*:meta:read,write,verify"], - {"organization": "myorg", "repo": "myrepo", "permission": Permission.READ}, + { + "organization": "myorg", + "repo": "myrepo", + "permission": Permission.READ, + }, False, ), ( @@ -254,7 +311,11 @@ def test_jwt_pre_authorize_action_custom_lifetime(): ), ( "obj:myorg/*/*:read", - {"organization": "myorg", "repo": "myrepo", "permission": Permission.READ}, + { + "organization": "myorg", + "repo": "myrepo", + "permission": Permission.READ, + }, True, ), ( @@ -268,12 +329,20 @@ def test_jwt_pre_authorize_action_custom_lifetime(): ), ( "obj:myorg/*/*:read", - {"organization": "myorg", "repo": "myrepo", "permission": Permission.WRITE}, + { + "organization": "myorg", + "repo": "myrepo", + "permission": Permission.WRITE, + }, False, ), ( "obj:*/*/*:read", - {"organization": "myorg", "repo": "myrepo", "permission": Permission.READ}, + { + "organization": "myorg", + "repo": "myrepo", + "permission": Permission.READ, + }, True, ), ( @@ -493,6 +562,6 @@ def _get_test_token(lifetime=300, headers=None, algo="HS256", **kwargs): with open(JWT_RS_PRI_KEY) as f: key = f.read() else: - raise ValueError("Don't know how to test algo: {}".format(algo)) + raise ValueError(f"Don't know how to test algo: {algo}") return jwt.encode(payload, key, algorithm=algo, headers=headers) diff --git a/tests/conftest.py b/tests/conftest.py index 16cf975..0d12fdb 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -7,7 +7,7 @@ from giftless.auth import allow_anon, authentication -@pytest.fixture() +@pytest.fixture def storage_path(tmp_path): path = tmp_path / "lfs-tests" path.mkdir() @@ -17,14 +17,16 @@ def storage_path(tmp_path): shutil.rmtree(path) -@pytest.fixture() +@pytest.fixture def app(storage_path): """Session fixture to configure the Flask app""" app = init_app( additional_config={ "TESTING": True, "TRANSFER_ADAPTERS": { - "basic": {"options": {"storage_options": {"path": storage_path}}} + "basic": { + "options": {"storage_options": {"path": storage_path}} + } }, } ) @@ -32,7 +34,7 @@ def app(storage_path): return app -@pytest.fixture() +@pytest.fixture def app_context(app): ctx = app.app_context() try: @@ -42,13 +44,13 @@ def app_context(app): ctx.pop() -@pytest.fixture() +@pytest.fixture def test_client(app_context: AppContext): test_client = app_context.app.test_client() return test_client -@pytest.fixture() +@pytest.fixture def authz_full_access( app_context, ): # needed to ensure we call init_authenticators before app context is destroyed diff --git a/tests/storage/__init__.py b/tests/storage/__init__.py index 122261f..77cfca4 100644 --- a/tests/storage/__init__.py +++ b/tests/storage/__init__.py @@ -5,7 +5,9 @@ from giftless.storage import ExternalStorage, StreamingStorage from giftless.storage.exc import ObjectNotFound -ARBITRARY_OID = "2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824" +ARBITRARY_OID = ( + "2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824" +) class _CommonStorageAbstractTests: @@ -18,7 +20,9 @@ def test_get_size(self, storage_backend): """Test getting the size of a stored object""" content = b"The contents of a file-like object" storage_backend.put("org/repo", ARBITRARY_OID, io.BytesIO(content)) - assert len(content) == storage_backend.get_size("org/repo", ARBITRARY_OID) + assert len(content) == storage_backend.get_size( + "org/repo", ARBITRARY_OID + ) def test_get_size_not_existing(self, storage_backend): """Test getting the size of a non-existing object raises an exception""" @@ -45,7 +49,9 @@ class _VerifiableStorageAbstractTests: def test_verify_object_ok(self, storage_backend): content = b"The contents of a file-like object" storage_backend.put("org/repo", ARBITRARY_OID, io.BytesIO(content)) - assert storage_backend.verify_object("org/repo", ARBITRARY_OID, len(content)) + assert storage_backend.verify_object( + "org/repo", ARBITRARY_OID, len(content) + ) def test_verify_object_wrong_size(self, storage_backend): content = b"The contents of a file-like object" @@ -70,7 +76,9 @@ class StreamingStorageAbstractTests( def test_put_get_object(self, storage_backend: StreamingStorage): """Test a full put-then-get cycle""" content = b"The contents of a file-like object" - written = storage_backend.put("org/repo", ARBITRARY_OID, io.BytesIO(content)) + written = storage_backend.put( + "org/repo", ARBITRARY_OID, io.BytesIO(content) + ) assert len(content) == written diff --git a/tests/storage/test_amazon_s3.py b/tests/storage/test_amazon_s3.py index 70480ae..d9ead9c 100644 --- a/tests/storage/test_amazon_s3.py +++ b/tests/storage/test_amazon_s3.py @@ -3,19 +3,23 @@ import os from base64 import b64decode from binascii import unhexlify -from typing import Generator +from collections.abc import Generator import pytest from giftless.storage import ExternalStorage from giftless.storage.amazon_s3 import AmazonS3Storage -from . import ARBITRARY_OID, ExternalStorageAbstractTests, StreamingStorageAbstractTests +from . import ( + ARBITRARY_OID, + ExternalStorageAbstractTests, + StreamingStorageAbstractTests, +) TEST_AWS_S3_BUCKET_NAME = "test-giftless" -@pytest.fixture() +@pytest.fixture def storage_backend() -> Generator[AmazonS3Storage, None, None]: """Provide a S3 Storage backend for all AWS S3 tests @@ -32,7 +36,9 @@ def storage_backend() -> Generator[AmazonS3Storage, None, None]: prefix = "giftless-tests" # We use a live S3 bucket to test - storage = AmazonS3Storage(bucket_name=TEST_AWS_S3_BUCKET_NAME, path_prefix=prefix) + storage = AmazonS3Storage( + bucket_name=TEST_AWS_S3_BUCKET_NAME, path_prefix=prefix + ) try: yield storage finally: @@ -40,13 +46,14 @@ def storage_backend() -> Generator[AmazonS3Storage, None, None]: try: bucket.objects.all().delete() except Exception as e: - raise pytest.PytestWarning("Could not clean up after test: {}".format(e)) + raise pytest.PytestWarning(f"Could not clean up after test: {e}") @pytest.fixture(scope="module") def vcr_config(): live_tests = bool( - os.environ.get("AWS_ACCESS_KEY_ID") and os.environ.get("AWS_SECRET_ACCESS_KEY") + os.environ.get("AWS_ACCESS_KEY_ID") + and os.environ.get("AWS_SECRET_ACCESS_KEY") ) if live_tests: mode = "once" @@ -61,7 +68,7 @@ def vcr_config(): } -@pytest.mark.vcr() +@pytest.mark.vcr class TestAmazonS3StorageBackend( StreamingStorageAbstractTests, ExternalStorageAbstractTests ): diff --git a/tests/storage/test_azure.py b/tests/storage/test_azure.py index a1fcfad..6b91813 100644 --- a/tests/storage/test_azure.py +++ b/tests/storage/test_azure.py @@ -1,7 +1,7 @@ """Tests for the Azure storage backend """ import os -from typing import Generator +from collections.abc import Generator import pytest from azure.core.exceptions import AzureError # type: ignore @@ -15,7 +15,7 @@ MOCK_AZURE_CONTAINER_NAME = "my-container" -@pytest.fixture() +@pytest.fixture def storage_backend() -> Generator[AzureBlobsStorage, None, None]: """Provide an Azure Blob Storage backend for all Azure tests @@ -34,7 +34,9 @@ def storage_backend() -> Generator[AzureBlobsStorage, None, None]: connection_str ) try: - yield AzureBlobsStorage(connection_str, container_name, path_prefix=prefix) + yield AzureBlobsStorage( + connection_str, container_name, path_prefix=prefix + ) finally: container = client.get_container_client(container_name) try: @@ -55,7 +57,8 @@ def storage_backend() -> Generator[AzureBlobsStorage, None, None]: @pytest.fixture(scope="module") def vcr_config(): live_tests = bool( - os.environ.get("AZURE_CONNECTION_STRING") and os.environ.get("AZURE_CONTAINER") + os.environ.get("AZURE_CONNECTION_STRING") + and os.environ.get("AZURE_CONTAINER") ) if live_tests: mode = "once" @@ -67,7 +70,7 @@ def vcr_config(): } -@pytest.mark.vcr() +@pytest.mark.vcr class TestAzureBlobStorageBackend( StreamingStorageAbstractTests, ExternalStorageAbstractTests ): diff --git a/tests/storage/test_google_cloud.py b/tests/storage/test_google_cloud.py index cd78e20..01f745c 100644 --- a/tests/storage/test_google_cloud.py +++ b/tests/storage/test_google_cloud.py @@ -1,14 +1,13 @@ """Tests for the Google Cloud Storage storage backend """ import os -from typing import Generator +from collections.abc import Generator import pytest from google.api_core.exceptions import GoogleAPIError # type: ignore from giftless.storage.google_cloud import GoogleCloudStorage - MOCK_GCP_PROJECT_NAME = "giftless-tests" MOCK_GCP_BUCKET_NAME = "giftless-tests-20200818" @@ -56,7 +55,7 @@ ) -@pytest.fixture() +@pytest.fixture def storage_backend() -> Generator[GoogleCloudStorage, None, None]: """Provide a Google Cloud Storage backend for all GCS tests @@ -89,7 +88,7 @@ def storage_backend() -> Generator[GoogleCloudStorage, None, None]: bucket.delete_blobs(blobs) except GoogleAPIError as e: raise pytest.PytestWarning( - "Could not clean up after test: {}".format(e) + f"Could not clean up after test: {e}" ) else: yield GoogleCloudStorage( diff --git a/tests/storage/test_local.py b/tests/storage/test_local.py index a73e75a..02cb4a6 100644 --- a/tests/storage/test_local.py +++ b/tests/storage/test_local.py @@ -3,7 +3,7 @@ import os import pathlib import shutil -from typing import Generator +from collections.abc import Generator import pytest @@ -12,7 +12,7 @@ from . import StreamingStorageAbstractTests -@pytest.fixture() +@pytest.fixture def storage_dir(tmp_path) -> Generator[pathlib.Path, None, None]: """Create a unique temp dir for testing storage""" dir = None @@ -25,7 +25,7 @@ def storage_dir(tmp_path) -> Generator[pathlib.Path, None, None]: shutil.rmtree(dir) -@pytest.fixture() +@pytest.fixture def storage_backend(storage_dir) -> LocalStorage: """Provide a local storage backend for all local tests""" return LocalStorage(path=storage_dir) diff --git a/tests/test_batch_api.py b/tests/test_batch_api.py index 9b37fe3..8350cca 100644 --- a/tests/test_batch_api.py +++ b/tests/test_batch_api.py @@ -9,10 +9,12 @@ def test_upload_batch_request(test_client): """Test basic batch API with a basic successful upload request""" request_payload = batch_request_payload(operation="upload") - response = test_client.post("/myorg/myrepo/objects/batch", json=request_payload) + response = test_client.post( + "/myorg/myrepo/objects/batch", json=request_payload + ) - assert 200 == response.status_code - assert "application/vnd.git-lfs+json" == response.content_type + assert response.status_code == 200 + assert response.content_type == "application/vnd.git-lfs+json" payload = response.json assert "message" not in payload @@ -33,10 +35,12 @@ def test_download_batch_request(test_client, storage_path): oid = request_payload["objects"][0]["oid"] create_file_in_storage(storage_path, "myorg", "myrepo", oid, size=8) - response = test_client.post("/myorg/myrepo/objects/batch", json=request_payload) + response = test_client.post( + "/myorg/myrepo/objects/batch", json=request_payload + ) - assert 200 == response.status_code - assert "application/vnd.git-lfs+json" == response.content_type + assert response.status_code == 200 + assert response.content_type == "application/vnd.git-lfs+json" payload = response.json assert "message" not in payload @@ -50,7 +54,9 @@ def test_download_batch_request(test_client, storage_path): assert "download" in object["actions"] -def test_download_batch_request_two_files_one_missing(test_client, storage_path): +def test_download_batch_request_two_files_one_missing( + test_client, storage_path +): """Test batch API with a two object download request where one file 404""" request_payload = batch_request_payload(operation="download") oid = request_payload["objects"][0]["oid"] @@ -59,10 +65,12 @@ def test_download_batch_request_two_files_one_missing(test_client, storage_path) # Add a 2nd, non existing object request_payload["objects"].append({"oid": "12345679", "size": 5555}) - response = test_client.post("/myorg/myrepo/objects/batch", json=request_payload) + response = test_client.post( + "/myorg/myrepo/objects/batch", json=request_payload + ) - assert 200 == response.status_code - assert "application/vnd.git-lfs+json" == response.content_type + assert response.status_code == 200 + assert response.content_type == "application/vnd.git-lfs+json" payload = response.json assert "message" not in payload @@ -87,10 +95,12 @@ def test_download_batch_request_two_files_missing(test_client): request_payload = batch_request_payload(operation="download") request_payload["objects"].append({"oid": "12345679", "size": 5555}) - response = test_client.post("/myorg/myrepo/objects/batch", json=request_payload) + response = test_client.post( + "/myorg/myrepo/objects/batch", json=request_payload + ) - assert 404 == response.status_code - assert "application/vnd.git-lfs+json" == response.content_type + assert response.status_code == 404 + assert response.content_type == "application/vnd.git-lfs+json" payload = response.json assert "message" in payload @@ -98,22 +108,34 @@ def test_download_batch_request_two_files_missing(test_client): assert "transfer" not in payload -def test_download_batch_request_two_files_one_mismatch(test_client, storage_path): +def test_download_batch_request_two_files_one_mismatch( + test_client, storage_path +): """Test batch API with a two object download request where one file 422""" request_payload = batch_request_payload(operation="download") request_payload["objects"].append({"oid": "12345679", "size": 8}) create_file_in_storage( - storage_path, "myorg", "myrepo", request_payload["objects"][0]["oid"], size=8 + storage_path, + "myorg", + "myrepo", + request_payload["objects"][0]["oid"], + size=8, ) create_file_in_storage( - storage_path, "myorg", "myrepo", request_payload["objects"][1]["oid"], size=9 + storage_path, + "myorg", + "myrepo", + request_payload["objects"][1]["oid"], + size=9, ) - response = test_client.post("/myorg/myrepo/objects/batch", json=request_payload) + response = test_client.post( + "/myorg/myrepo/objects/batch", json=request_payload + ) - assert 200 == response.status_code - assert "application/vnd.git-lfs+json" == response.content_type + assert response.status_code == 200 + assert response.content_type == "application/vnd.git-lfs+json" payload = response.json assert "message" not in payload @@ -137,13 +159,19 @@ def test_download_batch_request_one_file_mismatch(test_client, storage_path): """Test batch API with a two object download request where one file 422""" request_payload = batch_request_payload(operation="download") create_file_in_storage( - storage_path, "myorg", "myrepo", request_payload["objects"][0]["oid"], size=9 + storage_path, + "myorg", + "myrepo", + request_payload["objects"][0]["oid"], + size=9, ) - response = test_client.post("/myorg/myrepo/objects/batch", json=request_payload) + response = test_client.post( + "/myorg/myrepo/objects/batch", json=request_payload + ) - assert 422 == response.status_code - assert "application/vnd.git-lfs+json" == response.content_type + assert response.status_code == 422 + assert response.content_type == "application/vnd.git-lfs+json" payload = response.json assert "message" in payload @@ -151,18 +179,26 @@ def test_download_batch_request_one_file_mismatch(test_client, storage_path): assert "transfer" not in payload -def test_download_batch_request_two_files_different_errors(test_client, storage_path): +def test_download_batch_request_two_files_different_errors( + test_client, storage_path +): """Test batch API with a two object download request where one file is missing and one is mismatch""" request_payload = batch_request_payload(operation="download") request_payload["objects"].append({"oid": "12345679", "size": 8}) create_file_in_storage( - storage_path, "myorg", "myrepo", request_payload["objects"][0]["oid"], size=9 + storage_path, + "myorg", + "myrepo", + request_payload["objects"][0]["oid"], + size=9, ) - response = test_client.post("/myorg/myrepo/objects/batch", json=request_payload) + response = test_client.post( + "/myorg/myrepo/objects/batch", json=request_payload + ) - assert 422 == response.status_code - assert "application/vnd.git-lfs+json" == response.content_type + assert response.status_code == 422 + assert response.content_type == "application/vnd.git-lfs+json" payload = response.json assert "message" in payload diff --git a/tests/test_error_responses.py b/tests/test_error_responses.py index 8947550..767d96f 100644 --- a/tests/test_error_responses.py +++ b/tests/test_error_responses.py @@ -10,8 +10,8 @@ def test_error_response_422(test_client): json=batch_request_payload(delete_keys=["operation"]), ) - assert 422 == response.status_code - assert "application/vnd.git-lfs+json" == response.content_type + assert response.status_code == 422 + assert response.content_type == "application/vnd.git-lfs+json" assert "message" in response.json @@ -19,17 +19,18 @@ def test_error_response_404(test_client): """Test a bad route error""" response = test_client.get("/now/for/something/completely/different") - assert 404 == response.status_code - assert "application/vnd.git-lfs+json" == response.content_type + assert response.status_code == 404 + assert response.content_type == "application/vnd.git-lfs+json" assert "message" in response.json def test_error_response_403(test_client): """Test that we get Forbidden when trying to upload with the default read-only setup""" response = test_client.post( - "/myorg/myrepo/objects/batch", json=batch_request_payload(operation="upload") + "/myorg/myrepo/objects/batch", + json=batch_request_payload(operation="upload"), ) - assert 403 == response.status_code - assert "application/vnd.git-lfs+json" == response.content_type + assert response.status_code == 403 + assert response.content_type == "application/vnd.git-lfs+json" assert "message" in response.json diff --git a/tests/test_middleware.py b/tests/test_middleware.py index b607e56..1a1303e 100644 --- a/tests/test_middleware.py +++ b/tests/test_middleware.py @@ -7,14 +7,16 @@ from .helpers import batch_request_payload -@pytest.fixture() +@pytest.fixture def app(storage_path): """Session fixture to configure the Flask app""" app = init_app( additional_config={ "TESTING": True, "TRANSFER_ADAPTERS": { - "basic": {"options": {"storage_options": {"path": storage_path}}} + "basic": { + "options": {"storage_options": {"path": storage_path}} + } }, "MIDDLEWARE": [ { @@ -35,11 +37,13 @@ def app(storage_path): def test_upload_request_with_x_forwarded_middleware(test_client): """Test the ProxyFix middleware generates correct URLs if X-Forwarded headers are set""" request_payload = batch_request_payload(operation="upload") - response = test_client.post("/myorg/myrepo/objects/batch", json=request_payload) + response = test_client.post( + "/myorg/myrepo/objects/batch", json=request_payload + ) - assert 200 == response.status_code + assert response.status_code == 200 href = response.json["objects"][0]["actions"]["upload"]["href"] - assert "http://localhost/myorg/myrepo/objects/storage/12345678" == href + assert href == "http://localhost/myorg/myrepo/objects/storage/12345678" response = test_client.post( "/myorg/myrepo/objects/batch", @@ -52,8 +56,9 @@ def test_upload_request_with_x_forwarded_middleware(test_client): }, ) - assert 200 == response.status_code + assert response.status_code == 200 href = response.json["objects"][0]["actions"]["upload"]["href"] assert ( - "https://mycompany.xyz:1234/lfs/myorg/myrepo/objects/storage/12345678" == href + href + == "https://mycompany.xyz:1234/lfs/myorg/myrepo/objects/storage/12345678" ) diff --git a/tests/test_schema.py b/tests/test_schema.py index ea07249..f9d9ce9 100644 --- a/tests/test_schema.py +++ b/tests/test_schema.py @@ -28,7 +28,11 @@ def test_batch_request_schema_valid(input): ({}), (batch_request_payload(operation="sneeze")), (batch_request_payload(objects=[])), - (batch_request_payload(objects=[{"oid": 123456, "size": "large of course"}])), + ( + batch_request_payload( + objects=[{"oid": 123456, "size": "large of course"}] + ) + ), (batch_request_payload(objects=[{"oid": "123abc", "size": -12}])), ], ) @@ -52,10 +56,10 @@ def test_object_schema_accepts_x_fields(): "x-disposition": "inline", } parsed = schema.ObjectSchema().load(payload) - assert "foobarbaz" == parsed["extra"]["filename"] - assert 123123123123 == parsed["extra"]["mtime"] - assert "123abc" == parsed["oid"] - assert "inline" == parsed["extra"]["disposition"] + assert parsed["extra"]["filename"] == "foobarbaz" + assert parsed["extra"]["mtime"] == 123123123123 + assert parsed["oid"] == "123abc" + assert parsed["extra"]["disposition"] == "inline" def test_object_schema_rejects_unknown_fields(): diff --git a/tests/transfer/conftest.py b/tests/transfer/conftest.py index 463c77b..5523247 100644 --- a/tests/transfer/conftest.py +++ b/tests/transfer/conftest.py @@ -5,11 +5,11 @@ from giftless import transfer -@pytest.fixture() +@pytest.fixture def reset_registered_transfers(): """Reset global registered transfer adapters for each module""" - adapters = dict(transfer._registered_adapters) # noqa + adapters = dict(transfer._registered_adapters) try: yield finally: - transfer._registered_adapters = adapters # noqa + transfer._registered_adapters = adapters diff --git a/tests/transfer/test_basic_external_adapter.py b/tests/transfer/test_basic_external_adapter.py index 9b8fe9e..3119d8d 100644 --- a/tests/transfer/test_basic_external_adapter.py +++ b/tests/transfer/test_basic_external_adapter.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, Optional, Tuple +from typing import Any, Optional from urllib.parse import urlencode import pytest @@ -12,13 +12,13 @@ def test_factory_returns_object(): base_url = "https://s4.example.com/" lifetime = 300 adapter = basic_external.factory( - "{}:MockExternalStorageBackend".format( - __name__, - ), + f"{__name__}:MockExternalStorageBackend", {"base_url": base_url}, lifetime, ) - assert isinstance(adapter, basic_external.BasicExternalBackendTransferAdapter) + assert isinstance( + adapter, basic_external.BasicExternalBackendTransferAdapter + ) assert getattr(adapter.storage, "base_url", None) == base_url assert adapter.action_lifetime == lifetime @@ -26,9 +26,7 @@ def test_factory_returns_object(): @pytest.mark.usefixtures("app_context") def test_upload_action_new_file(): adapter = basic_external.factory( - "{}:MockExternalStorageBackend".format( - __name__, - ), + f"{__name__}:MockExternalStorageBackend", {}, 900, ) @@ -56,7 +54,7 @@ def test_upload_action_new_file(): @pytest.mark.usefixtures("app_context") def test_upload_action_extras_are_passed(): adapter = basic_external.factory( - "{}:MockExternalStorageBackend".format(__name__), {}, 900 + f"{__name__}:MockExternalStorageBackend", {}, 900 ) response = adapter.upload( "myorg", "myrepo", "abcdef123456", 1234, {"filename": "foo.csv"} @@ -187,7 +185,7 @@ class MockExternalStorageBackend(basic_external.ExternalStorage): """ def __init__(self, base_url: str = "https://cloudstorage.example.com/"): - self.existing_objects: Dict[Tuple[str, str], int] = {} + self.existing_objects: dict[tuple[str, str], int] = {} self.base_url = base_url def exists(self, prefix: str, oid: str) -> bool: @@ -205,12 +203,14 @@ def get_upload_action( oid: str, size: int, expires_in: int, - extra: Optional[Dict[str, Any]] = None, - ) -> Dict[str, Any]: + extra: Optional[dict[str, Any]] = None, + ) -> dict[str, Any]: return { "actions": { "upload": { - "href": self._get_signed_url(prefix, oid, expires_in, extra), + "href": self._get_signed_url( + prefix, oid, expires_in, extra + ), "header": {"x-foo-bar": "bazbaz"}, "expires_in": expires_in, } @@ -223,12 +223,14 @@ def get_download_action( oid: str, size: int, expires_in: int, - extra: Optional[Dict[str, Any]] = None, - ) -> Dict[str, Any]: + extra: Optional[dict[str, Any]] = None, + ) -> dict[str, Any]: return { "actions": { "download": { - "href": self._get_signed_url(prefix, oid, expires_in, extra), + "href": self._get_signed_url( + prefix, oid, expires_in, extra + ), "header": {}, "expires_in": 900, } @@ -240,9 +242,9 @@ def _get_signed_url( prefix: str, oid: str, expires_in: int, - extra: Optional[Dict[str, Any]] = None, + extra: Optional[dict[str, Any]] = None, ): - url = "{}{}/{}?expires_in={}".format(self.base_url, prefix, oid, expires_in) + url = f"{self.base_url}{prefix}/{oid}?expires_in={expires_in}" if extra: url = f"{url}&{urlencode(extra, doseq=False)}" return url From 3b90ae646c457ae326151a2224d05aeaad89e961 Mon Sep 17 00:00:00 2001 From: adam Date: Wed, 3 Jan 2024 09:59:21 -0700 Subject: [PATCH 4/4] Allow Python 3.10-3.12, format for 3.10 --- .pre-commit-config.yaml | 6 ++++++ Makefile | 2 +- giftless/auth/__init__.py | 4 +++- giftless/storage/azure.py | 6 ++++-- pyproject.toml | 6 ++++-- requirements.in | 2 +- requirements.txt | 2 +- 7 files changed, 20 insertions(+), 8 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7dccfc9..dc3a0d3 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -18,6 +18,12 @@ repos: # args: [--fix, --exit-non-zero-on-fix] # - id: ruff-format + # FIXME: replace with ruff, eventually + - repo: https://github.com/psf/black + rev: 23.12.1 + hooks: + - id: black + - repo: https://github.com/adamchainz/blacken-docs rev: 1.16.0 hooks: diff --git a/Makefile b/Makefile index 267fdf8..da0ba28 100644 --- a/Makefile +++ b/Makefile @@ -83,7 +83,7 @@ $(SENTINELS): mkdir $@ $(SENTINELS)/dist-setup: | $(SENTINELS) - $(PIP) install -U pip wheel twine + $(PIP) install -U pip wheel twine pre-commit @touch $@ $(SENTINELS)/dist: $(SENTINELS)/dist-setup $(DIST_DIR)/$(PACKAGE_NAME)-$(VERSION).tar.gz $(DIST_DIR)/$(PACKAGE_NAME)-$(VERSION)-py3-none-any.whl | $(SENTINELS) diff --git a/giftless/auth/__init__.py b/giftless/auth/__init__.py index f808c05..a969f76 100644 --- a/giftless/auth/__init__.py +++ b/giftless/auth/__init__.py @@ -192,7 +192,9 @@ def _create_authenticator(spec: Union[str, dict[str, Any]]) -> Authenticator: return get_callable(spec, __name__) log.debug("Creating authenticator using factory: %s", spec["factory"]) - factory = get_callable(spec["factory"], __name__) # type: Callable[..., Authenticator] + factory = get_callable( + spec["factory"], __name__ + ) # type: Callable[..., Authenticator] options = spec.get("options", {}) return factory(**options) diff --git a/giftless/storage/azure.py b/giftless/storage/azure.py index 57a2025..4979e34 100644 --- a/giftless/storage/azure.py +++ b/giftless/storage/azure.py @@ -3,7 +3,7 @@ import posixpath from collections import namedtuple from collections.abc import Iterable -from datetime import UTC, datetime, timedelta +from datetime import datetime, timedelta, timezone from typing import IO, Any, Optional from urllib.parse import urlencode from xml.sax.saxutils import escape as xml_escape @@ -248,7 +248,9 @@ def _get_signed_url( ) -> str: blob_name = self._get_blob_path(prefix, oid) blob_permissions = BlobSasPermissions(**permissions) - token_expires = datetime.now(tz=UTC) + timedelta(seconds=expires_in) + token_expires = datetime.now(tz=timezone.utc) + timedelta( + seconds=expires_in + ) extra_args: dict[str, Any] = {} if filename and disposition: diff --git a/pyproject.toml b/pyproject.toml index ce5e7ea..9933044 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,12 +15,14 @@ classifiers = [ "License :: OSI Approved:: MIT License", "Programming Language :: Python", "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", "Natural Language :: English", "Operating System :: POSIX", "Typing :: Typed", ] -requires-python = ">=3.11" +requires-python = ">=3.10" dependencies = [] dynamic = ["version"] @@ -53,7 +55,7 @@ build-backend = "setuptools.build_meta" [tool.black] line-length = 79 -target-version = ["py311"] +target-version = ["py310"] exclude = ''' /( \.eggs diff --git a/requirements.in b/requirements.in index d73ed75..486b08a 100644 --- a/requirements.in +++ b/requirements.in @@ -20,4 +20,4 @@ azure-storage-blob~=12.19 google-cloud-storage~=2.14 boto3~=1.34 -importlib-metadata; python_version < '3.11' +importlib-metadata; python_version < '3.13' diff --git a/requirements.txt b/requirements.txt index 0c34de2..6bc2f41 100644 --- a/requirements.txt +++ b/requirements.txt @@ -70,7 +70,7 @@ idna==3.6 # via # anyio # requests -importlib-metadata==7.0.0 ; python_version < "3.11" +importlib-metadata==7.0.0 ; python_version < "3.13" # via -r requirements.in isodate==0.6.1 # via azure-storage-blob