From 1c12ca4731dd74805278a3d94c29c0403f642c67 Mon Sep 17 00:00:00 2001 From: Dustin Ingram Date: Fri, 8 Mar 2024 11:15:37 +0000 Subject: [PATCH] Normalize filenames --- conveyor/views.py | 57 ++++++++++++++++++++++++++++++++++++++++++--- tests/test_views.py | 38 ++++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+), 3 deletions(-) create mode 100644 tests/test_views.py diff --git a/conveyor/views.py b/conveyor/views.py index accc890..9c03865 100644 --- a/conveyor/views.py +++ b/conveyor/views.py @@ -18,6 +18,8 @@ from aiohttp import web from botocore.config import Config as BotoCoreConfig +from packaging.utils import parse_sdist_filename, parse_wheel_filename +from packaging.utils import canonicalize_name, canonicalize_version ANON_CONFIG = BotoCoreConfig(signature_version=botocore.UNSIGNED) @@ -30,6 +32,48 @@ async def not_found(request): return web.Response(status=404) +async def _normalize_filename(filename): + if filename.endswith(".whl"): + name, ver, build, tags = parse_wheel_filename(filename) + return ( + "-".join( + [ + canonicalize_name(name), + canonicalize_version(ver), + ] + + (["".join(str(x) for x in build)] if build else []) + + [ + "-".join(str(x) for x in tags), + ] + ) + + ".whl" + ) + elif filename.endswith(".tar.gz"): + name, ver = parse_sdist_filename(filename) + return ( + "-".join( + [ + canonicalize_name(name), + canonicalize_version(ver), + ] + ) + + ".tar.gz" + ) + elif filename.endswith(".zip"): + name, ver = parse_sdist_filename(filename) + return ( + "-".join( + [ + canonicalize_name(name), + canonicalize_version(ver), + ] + ) + + ".zip" + ) + else: + return filename + + async def redirect(request): python_version = request.match_info["python_version"] project_l = request.match_info["project_l"] @@ -38,7 +82,9 @@ async def redirect(request): # If the letter bucket doesn't match the first letter of the project, then # there is no point to going any further since it will be a 404 regardless. - if project_l != project_name[0]: + # Allow specifiying the exact first character of the actual filename (which + # might not be lowercase, to maintain backwards compatibility + if project_l != project_name[0].lower() and project_l != project_name[0]: return web.Response(status=404, headers={"Reason": "Incorrect project bucket"}) # If the filename we're looking for is a signature, then we'll need to turn @@ -72,8 +118,13 @@ async def redirect(request): # 302 redirect to that URL. for release in data.get("releases", {}).values(): for file_ in release: - if (file_["filename"] == filename - and file_["python_version"] == python_version): + if ( + # Prefer that the normalized filename has been specified + _normalize_filename(file_["filename"]) == filename + # But also allow specifying the exact filename, to maintain + # backwards compatiblity + or file_["filename"] == filename + ) and file_["python_version"] == python_version: # If we've found our filename, but we were actually looking for # the *signature* of that file, then we need to check if it has # a signature associated with it, and if so redirect to that, diff --git a/tests/test_views.py b/tests/test_views.py new file mode 100644 index 0000000..f055ba7 --- /dev/null +++ b/tests/test_views.py @@ -0,0 +1,38 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from conveyor.views import _normalize_filename + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "filename, expected", + [ + ("Flask-Common-0.2.0.tar.gz", "flask-common-0.2.tar.gz"), + ("websocket_client-0.52.0.tar.gz", "websocket-client-0.52.tar.gz"), + ("Sphinx-7.1.1.tar.gz", "sphinx-7.1.1.tar.gz"), + ("Foo_Bar-24.0.0.0.tar.gz", "foo-bar-24.tar.gz"), + ("Foo_Bar-24.0.0.0-py3-none-any.whl", "foo-bar-24-py3-none-any.whl"), + ("foo-24-py3-none-any.whl", "foo-24-py3-none-any.whl"), + ( + "spam-1.0-420yolo-py3-none-any.whl", + "spam-1-420yolo-py3-none-any.whl", + ), # Build tag + ("Foo_bar-24.0.0.0.zip", "foo-bar-24.zip"), + ], +) +async def test_normalize_filename(filename, expected): + result = await _normalize_filename(filename) + + assert result == expected