From 0cfbaacf7005076dfe7130679a066661c14122c6 Mon Sep 17 00:00:00 2001 From: Matteo De Wint Date: Mon, 1 Jan 2024 14:35:55 +0100 Subject: [PATCH] feat: add `s3pypi delete` command (#115) * feat: moved `s3pypi` command to `s3pypi upload` * feat: add `s3pypi delete` command * refactor: move locker into S3Storage * feat: rename `--unsafe-s3-website` option to `--index.html` * chore: update changelog * refactor: use explicit `root_index` name * chore: remove rc1 from version --- CHANGELOG.md | 21 +++++- README.md | 9 ++- pyproject.toml | 2 +- s3pypi/__init__.py | 2 +- s3pypi/__main__.py | 121 ++++++++++++++++++------------ s3pypi/core.py | 65 ++++++++-------- s3pypi/storage.py | 46 +++++++++--- setup.cfg | 2 +- tests/integration/test_main.py | 42 +++++++++-- tests/integration/test_storage.py | 26 +++---- 10 files changed, 223 insertions(+), 113 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8e1bb36..b406c27 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,24 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [PEP 440](https://www.python.org/dev/peps/pep-0440/). +## 2.0.0 - 2024-XX-XX + +### Added + +- `s3pypi delete` command to delete packages from S3. + +### Changed + +- Moved default command to `s3pypi upload`. +- Renamed `--unsafe-s3-website` option to `--index.html`. + +### Removed + +- `--acl` option. Use `--s3-put-args='ACL=...'` instead. [The use of ACLs is discouraged]. + +[The use of ACLs is discouraged]: https://docs.aws.amazon.com/AmazonS3/latest/userguide/about-object-ownership.html + + ## 1.2.1 - 2023-12-31 ### Fixed @@ -46,8 +64,7 @@ and this project adheres to [PEP 440](https://www.python.org/dev/peps/pep-0440/) ### Added -- SHA-256 checksums of packages to URLs. [@andrei-shabanski](https://github.com/andrei- - shabanski) +- SHA-256 checksums in URLs. [@andrei-shabanski](https://github.com/andrei-shabanski) - `--no-sign-request` option to disable S3 authentication. [@jaustinpage](https://github.com/jaustinpage) - Expand glob patterns in case they were not expanded by a shell. diff --git a/README.md b/README.md index 581ca48..3f28a44 100644 --- a/README.md +++ b/README.md @@ -138,6 +138,13 @@ be renamed to `/`. You can do so using the provided script: $ scripts/migrate-s3-index.py example-bucket ``` +To instead keep using the old configuration with a publicly accessible S3 +website endpoint, pass the following options when uploading packages: + +```console +$ s3pypi upload ... --index.html --s3-put-args='ACL=public-read' +``` + [Origin Access Identity (OAI)]: https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/private-content-restricting-access-to-s3.html @@ -151,7 +158,7 @@ You can now use `s3pypi` to upload packages to S3: $ cd /path/to/your-project/ $ python setup.py sdist bdist_wheel -$ s3pypi dist/* --bucket example-bucket [--prefix PREFIX] [--acl ACL] +$ s3pypi upload dist/* --bucket example-bucket [--prefix PREFIX] ``` See `s3pypi --help` for a description of all options. diff --git a/pyproject.toml b/pyproject.toml index 8042491..7fb254e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "s3pypi" -version = "1.2.1" +version = "2.0.0" description = "CLI for creating a Python Package Repository in an S3 bucket" authors = [ "Matteo De Wint ", diff --git a/s3pypi/__init__.py b/s3pypi/__init__.py index fd84cc0..dec3ee0 100644 --- a/s3pypi/__init__.py +++ b/s3pypi/__init__.py @@ -1,2 +1,2 @@ __prog__ = "s3pypi" -__version__ = "1.2.1" +__version__ = "2.0.0" diff --git a/s3pypi/__main__.py b/s3pypi/__main__.py index 75309c9..0293d8b 100644 --- a/s3pypi/__main__.py +++ b/s3pypi/__main__.py @@ -2,9 +2,9 @@ import logging import sys -from argparse import ArgumentParser +from argparse import ArgumentParser, Namespace from pathlib import Path -from typing import Dict +from typing import Callable, Dict from s3pypi import __prog__, __version__, core @@ -18,39 +18,84 @@ def string_dict(text: str) -> Dict[str, str]: def build_arg_parser() -> ArgumentParser: p = ArgumentParser(prog=__prog__) - p.add_argument( + p.add_argument("-V", "--version", action="version", version=__version__) + p.add_argument("-v", "--verbose", action="store_true", help="Verbose output.") + + commands = p.add_subparsers(help="Commands", required=True) + + def add_command( + func: Callable[[core.Config, Namespace], None], help: str + ) -> ArgumentParser: + name = func.__name__.replace("_", "-") + cmd = commands.add_parser(name, help=help) + cmd.set_defaults(func=func) + return cmd + + up = add_command(upload, help="Upload packages to S3.") + up.add_argument( "dist", nargs="+", type=Path, help="The distribution files to upload to S3. Usually `dist/*`.", ) + build_s3_args(up) + up.add_argument( + "--put-root-index", + action="store_true", + help="Write a root index that lists all available package names.", + ) + g = up.add_mutually_exclusive_group() + g.add_argument( + "--strict", + action="store_true", + help="Fail when trying to upload existing files.", + ) + g.add_argument( + "-f", "--force", action="store_true", help="Overwrite existing files." + ) + + d = add_command(delete, help="Delete packages from S3.") + d.add_argument("name", help="Package name.") + d.add_argument("version", help="Package version.") + build_s3_args(d) + + return p + + +def build_s3_args(p: ArgumentParser) -> None: p.add_argument("-b", "--bucket", required=True, help="The S3 bucket to upload to.") + p.add_argument("--prefix", help="Optional prefix to use for S3 object names.") + p.add_argument("--profile", help="Optional AWS profile to use.") p.add_argument("--region", help="Optional AWS region to target.") - p.add_argument("--prefix", help="Optional prefix to use for S3 object names.") - p.add_argument("--acl", help="Optional canned ACL to use for S3 objects.") - p.add_argument("--s3-endpoint-url", help="Optional custom S3 endpoint URL.") + p.add_argument( + "--no-sign-request", + action="store_true", + help="Don't use authentication when communicating with S3.", + ) + p.add_argument( + "--s3-endpoint-url", metavar="URL", help="Optional custom S3 endpoint URL." + ) p.add_argument( "--s3-put-args", + metavar="ARGS", type=string_dict, default={}, help=( "Optional extra arguments to S3 PutObject calls. Example: " - "'ServerSideEncryption=aws:kms,SSEKMSKeyId=1234...'" + "'ACL=public-read,ServerSideEncryption=aws:kms,SSEKMSKeyId=1234...'" ), ) p.add_argument( - "--unsafe-s3-website", + "--index.html", + dest="index_html", action="store_true", help=( - "Store the index as an S3 object named `/index.html` instead of `/`. " - "This option is provided for backwards compatibility with S3 website endpoints, " - "the use of which is discouraged because they require the bucket to be publicly accessible. " - "It's recommended to instead use a private S3 bucket with a CloudFront Origin Access Identity." + "Store index pages with suffix `/index.html` instead of `/`. " + "This provides compatibility with custom HTTPS proxies or S3 website endpoints." ), ) p.add_argument( - "-l", "--lock-indexes", action="store_true", help=( @@ -58,30 +103,20 @@ def build_arg_parser() -> ArgumentParser: "This ensures that concurrent invocations of s3pypi do not overwrite each other's changes." ), ) - p.add_argument( - "--put-root-index", - action="store_true", - help="Write a root index that lists all available package names.", - ) - p.add_argument( - "--no-sign-request", - action="store_true", - help="Don't use authentication when communicating with S3.", - ) - g = p.add_mutually_exclusive_group() - g.add_argument( - "--strict", - action="store_true", - help="Fail when trying to upload existing files.", - ) - g.add_argument( - "-f", "--force", action="store_true", help="Overwrite existing files." + +def upload(cfg: core.Config, args: Namespace) -> None: + core.upload_packages( + cfg, + args.dist, + put_root_index=args.put_root_index, + strict=args.strict, + force=args.force, ) - p.add_argument("-v", "--verbose", action="store_true", help="Verbose output.") - p.add_argument("-V", "--version", action="version", version=__version__) - return p + +def delete(cfg: core.Config, args: Namespace) -> None: + core.delete_package(cfg, name=args.name, version=args.version) def main(*raw_args: str) -> None: @@ -89,27 +124,21 @@ def main(*raw_args: str) -> None: log.setLevel(logging.DEBUG if args.verbose else logging.INFO) cfg = core.Config( - dist=args.dist, s3=core.S3Config( bucket=args.bucket, prefix=args.prefix, + profile=args.profile, + region=args.region, + no_sign_request=args.no_sign_request, endpoint_url=args.s3_endpoint_url, put_kwargs=args.s3_put_args, - unsafe_s3_website=args.unsafe_s3_website, - no_sign_request=args.no_sign_request, + index_html=args.index_html, + lock_indexes=args.lock_indexes, ), - strict=args.strict, - force=args.force, - lock_indexes=args.lock_indexes, - put_root_index=args.put_root_index, - profile=args.profile, - region=args.region, ) - if args.acl: - cfg.s3.put_kwargs["ACL"] = args.acl try: - core.upload_packages(cfg) + args.func(cfg, args) except core.S3PyPiError as e: sys.exit(f"ERROR: {e}") diff --git a/s3pypi/core.py b/s3pypi/core.py index 27588bc..78440ed 100644 --- a/s3pypi/core.py +++ b/s3pypi/core.py @@ -6,15 +6,12 @@ from itertools import groupby from operator import attrgetter from pathlib import Path -from typing import List, Optional +from typing import List from zipfile import ZipFile -import boto3 - from s3pypi import __prog__ from s3pypi.exceptions import S3PyPiError from s3pypi.index import Hash -from s3pypi.locking import DummyLocker, DynamoDBLocker from s3pypi.storage import S3Config, S3Storage log = logging.getLogger(__prog__) @@ -24,14 +21,7 @@ @dataclass class Config: - dist: List[Path] s3: S3Config - strict: bool = False - force: bool = False - lock_indexes: bool = False - put_root_index: bool = False - profile: Optional[str] = None - region: Optional[str] = None @dataclass @@ -45,28 +35,27 @@ def normalize_package_name(name: str) -> str: return re.sub(r"[-_.]+", "-", name.lower()) -def upload_packages(cfg: Config) -> None: - session = boto3.Session(profile_name=cfg.profile, region_name=cfg.region) - storage = S3Storage(session, cfg.s3) - lock = ( - DynamoDBLocker(session, table=f"{cfg.s3.bucket}-locks") - if cfg.lock_indexes - else DummyLocker() - ) +def upload_packages( + cfg: Config, + dist: List[Path], + put_root_index: bool = False, + strict: bool = False, + force: bool = False, +) -> None: + storage = S3Storage(cfg.s3) + distributions = parse_distributions(dist) - distributions = parse_distributions(cfg.dist) get_name = attrgetter("name") existing_files = [] for name, group in groupby(sorted(distributions, key=get_name), get_name): directory = normalize_package_name(name) - with lock(directory): - index = storage.get_index(directory) + with storage.locked_index(directory) as index: for distr in group: filename = distr.local_path.name - if not cfg.force and filename in index.filenames: + if not force and filename in index.filenames: existing_files.append(filename) msg = "%s already exists! (use --force to overwrite)" log.warning(msg, filename) @@ -75,14 +64,11 @@ def upload_packages(cfg: Config) -> None: storage.put_distribution(directory, distr.local_path) index.filenames[filename] = Hash.of("sha256", distr.local_path) - storage.put_index(directory, index) - - if cfg.put_root_index: - with lock(storage.root): - index = storage.build_root_index() - storage.put_index(storage.root, index) + if put_root_index: + with storage.locked_index(storage.root) as root_index: + root_index.filenames = dict.fromkeys(storage.list_directories()) - if cfg.strict and existing_files: + if strict and existing_files: raise S3PyPiError(f"Found {len(existing_files)} existing files on S3") @@ -133,3 +119,22 @@ def extract_wheel_metadata(path: Path) -> PackageMetadata: raise S3PyPiError(f"No wheel metadata found in {path}") from None return email.message_from_string(text) + + +def delete_package(cfg: Config, name: str, version: str) -> None: + storage = S3Storage(cfg.s3) + directory = normalize_package_name(name) + + with storage.locked_index(directory) as index: + filenames = [f for f in index.filenames if f.split("-", 2)[1] == version] + if not filenames: + raise S3PyPiError(f"Package not found: {name} {version}") + + for filename in filenames: + log.info("Deleting %s", filename) + storage.delete(directory, filename) + del index.filenames[filename] + + if not index.filenames: + with storage.locked_index(storage.root) as root_index: + root_index.filenames.pop(directory, None) diff --git a/s3pypi/storage.py b/s3pypi/storage.py index 35885c6..2d79718 100644 --- a/s3pypi/storage.py +++ b/s3pypi/storage.py @@ -1,6 +1,7 @@ +from contextlib import contextmanager from dataclasses import dataclass, field from pathlib import Path -from typing import Dict, List, Optional +from typing import Dict, Iterator, List, Optional import boto3 import botocore @@ -8,31 +9,43 @@ from mypy_boto3_s3.service_resource import Object from s3pypi.index import Index +from s3pypi.locking import DummyLocker, DynamoDBLocker @dataclass class S3Config: bucket: str prefix: Optional[str] = None + profile: Optional[str] = None + region: Optional[str] = None + no_sign_request: bool = False endpoint_url: Optional[str] = None put_kwargs: Dict[str, str] = field(default_factory=dict) - unsafe_s3_website: bool = False - no_sign_request: bool = False + index_html: bool = False + lock_indexes: bool = False class S3Storage: root = "/" _index = "index.html" - def __init__(self, session: boto3.session.Session, cfg: S3Config): - _config = None + def __init__(self, cfg: S3Config): + session = boto3.Session(profile_name=cfg.profile, region_name=cfg.region) + + config = None if cfg.no_sign_request: - _config = BotoConfig(signature_version=botocore.session.UNSIGNED) # type: ignore + config = BotoConfig(signature_version=botocore.session.UNSIGNED) # type: ignore - self.s3 = session.resource("s3", endpoint_url=cfg.endpoint_url, config=_config) - self.index_name = self._index if cfg.unsafe_s3_website else "" + self.s3 = session.resource("s3", endpoint_url=cfg.endpoint_url, config=config) + self.index_name = self._index if cfg.index_html else "" self.cfg = cfg + self.lock = ( + DynamoDBLocker(session, table=f"{cfg.bucket}-locks") + if cfg.lock_indexes + else DummyLocker() + ) + def _object(self, directory: str, filename: str) -> Object: parts = [directory, filename] if parts == [self.root, self.index_name]: @@ -48,10 +61,18 @@ def get_index(self, directory: str) -> Index: return Index() return Index.parse(html.decode()) - def build_root_index(self) -> Index: - return Index(dict.fromkeys(self._list_dirs())) + @contextmanager + def locked_index(self, directory: str) -> Iterator[Index]: + with self.lock(directory): + index = self.get_index(directory) + yield index + + if index.filenames: + self.put_index(directory, index) + else: + self.delete(directory, self.index_name) - def _list_dirs(self) -> List[str]: + def list_directories(self) -> List[str]: prefix = f"{p}/" if (p := self.cfg.prefix) else "" return [ d[len(prefix) :] @@ -76,3 +97,6 @@ def put_distribution(self, directory: str, local_path: Path) -> None: ContentType="application/x-gzip", **self.cfg.put_kwargs, # type: ignore ) + + def delete(self, directory: str, filename: str) -> None: + self._object(directory, filename).delete() diff --git a/setup.cfg b/setup.cfg index a0e7560..b31db2c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 1.2.1 +current_version = 2.0.0 commit = True message = chore: bump version to {new_version} diff --git a/tests/integration/test_main.py b/tests/integration/test_main.py index d6ce03d..153c0aa 100644 --- a/tests/integration/test_main.py +++ b/tests/integration/test_main.py @@ -27,7 +27,7 @@ def test_main_upload_package(chdir, data_dir, s3_bucket, dynamodb_table, prefix) args.extend(["--prefix", prefix]) with chdir(data_dir): - s3pypi(*args) + s3pypi("upload", *args) def read(key: str) -> bytes: return s3_bucket.Object(key).get()["Body"].read() @@ -49,13 +49,13 @@ def test_main_upload_package_exists(chdir, data_dir, s3_bucket, caplog): dist = "dists/foo-0.1.0.tar.gz" with chdir(data_dir): - s3pypi(dist, "--bucket", s3_bucket.name) - s3pypi(dist, "--bucket", s3_bucket.name) + s3pypi("upload", dist, "--bucket", s3_bucket.name) + s3pypi("upload", dist, "--bucket", s3_bucket.name) with pytest.raises(SystemExit, match="ERROR: Found 1 existing files on S3"): - s3pypi(dist, "--strict", "--bucket", s3_bucket.name) + s3pypi("upload", dist, "--strict", "--bucket", s3_bucket.name) - s3pypi(dist, "--force", "--bucket", s3_bucket.name) + s3pypi("upload", dist, "--force", "--bucket", s3_bucket.name) msg = "foo-0.1.0.tar.gz already exists! (use --force to overwrite)" success = (__prog__, logging.INFO, "Uploading " + dist) @@ -80,7 +80,7 @@ def test_main_upload_package_invalid( ): with chdir(data_dir): with pytest.raises(SystemExit, match=f"ERROR: {error_msg}"): - s3pypi(*dists, "--bucket", s3_bucket.name) + s3pypi("upload", *dists, "--bucket", s3_bucket.name) def test_main_upload_package_with_force_updates_hash(chdir, data_dir, s3_bucket): @@ -98,7 +98,7 @@ def get_index(): with chdir(data_dir): dist = "dists/hello_world-0.1.0-py3-none-any.whl" - s3pypi(dist, "--force", "--bucket", s3_bucket.name) + s3pypi("upload", dist, "--force", "--bucket", s3_bucket.name) assert get_index().filenames == { "hello-world-0.1.0.tar.gz": None, @@ -106,3 +106,31 @@ def get_index(): "sha256", "c5a2633aecf5adc5ae49b868e12faf01f2199b914d4296399b52dec62cb70fb3" ), } + + +def test_main_delete_package(chdir, data_dir, s3_bucket): + with chdir(data_dir): + s3pypi("upload", "dists/*", "--bucket", s3_bucket.name, "--put-root-index") + s3pypi("delete", "hello-world", "0.1.0", "--bucket", s3_bucket.name) + + def read(key: str) -> bytes: + return s3_bucket.Object(key).get()["Body"].read() + + root_index = read("index.html").decode() + + def assert_pkg_exists(pkg: str, filename: str): + path = f"{pkg}/" + assert read(path + filename) + assert f">{filename}" in read(path).decode() + assert f">{pkg}" in root_index + + for deleted_key in [ + "hello-world/", + "hello-world/hello_world-0.1.0-py3-none-any.whl", + ]: + with pytest.raises(s3_bucket.meta.client.exceptions.NoSuchKey): + s3_bucket.Object(deleted_key).get() + + assert ">hello-world" not in root_index + assert_pkg_exists("foo", "foo-0.1.0.tar.gz") + assert_pkg_exists("xyz", "xyz-0.1.0.zip") diff --git a/tests/integration/test_storage.py b/tests/integration/test_storage.py index 1550f57..daaeb7c 100644 --- a/tests/integration/test_storage.py +++ b/tests/integration/test_storage.py @@ -4,12 +4,12 @@ from s3pypi.storage import S3Config, S3Storage -def test_index_storage_roundtrip(boto3_session, s3_bucket): +def test_index_storage_roundtrip(s3_bucket): directory = "foo" index = Index({"bar": None}) cfg = S3Config(bucket=s3_bucket.name) - s = S3Storage(boto3_session, cfg) + s = S3Storage(cfg) s.put_index(directory, index) got = s.get_index(directory) @@ -27,12 +27,12 @@ def test_index_storage_roundtrip(boto3_session, s3_bucket): (S3Config(""), "foo", "bar", "foo/bar"), (S3Config("", prefix="P"), "/", index, "P/"), (S3Config("", prefix="P"), "foo", "bar", "P/foo/bar"), - (S3Config("", prefix="P", unsafe_s3_website=True), "/", index, "P/index.html"), - (S3Config("", unsafe_s3_website=True), "/", index, "index.html"), + (S3Config("", prefix="P", index_html=True), "/", index, "P/index.html"), + (S3Config("", index_html=True), "/", index, "index.html"), ], ) -def test_s3_key(boto3_session, cfg, directory, filename, expected_key): - s = S3Storage(boto3_session, cfg) +def test_s3_key(cfg, directory, filename, expected_key): + s = S3Storage(cfg) if filename is index: filename = s.index_name @@ -41,23 +41,23 @@ def test_s3_key(boto3_session, cfg, directory, filename, expected_key): assert obj.key == expected_key -def test_list_dirs(boto3_session, s3_bucket): +def test_list_directories(s3_bucket): cfg = S3Config(bucket=s3_bucket.name, prefix="AA") - s = S3Storage(boto3_session, cfg) + s = S3Storage(cfg) s.put_index("one", Index()) s.put_index("two", Index()) s.put_index("three", Index()) - assert s._list_dirs() == ["one/", "three/", "two/"] + assert s.list_directories() == ["one/", "three/", "two/"] cfg = S3Config(bucket=s3_bucket.name, prefix="BBBB") - s = S3Storage(boto3_session, cfg) + s = S3Storage(cfg) s.put_index("xxx", Index()) s.put_index("yyy", Index()) - assert s._list_dirs() == ["xxx/", "yyy/"] + assert s.list_directories() == ["xxx/", "yyy/"] cfg = S3Config(bucket=s3_bucket.name) - s = S3Storage(boto3_session, cfg) + s = S3Storage(cfg) - assert s._list_dirs() == ["AA/", "BBBB/"] + assert s.list_directories() == ["AA/", "BBBB/"]