From 6a0165a45be7313e9721a7df2166be019f39023d Mon Sep 17 00:00:00 2001 From: Doug Hellmann Date: Sat, 22 Feb 2025 16:14:39 -0500 Subject: [PATCH] support bootstrapping from git checkouts of projects Support git+https URLs in the input requirements list. "Resolve" the version by cloning the repo at the specified reference and generating the metadata to extract the version string. Rely on the git clone instead of downloading a source distribution or allowing an override to specify where the source should come from. Rely on the git clone with preparing source, and ignore any overrides. Force sdists to be generated using the PEP-517 approach. Add e2e tests for cases with and without tags in the URLs. Signed-off-by: Doug Hellmann --- .github/workflows/test.yaml | 4 +- .mergify.yml | 12 +- e2e/test_bootstrap_git_url.sh | 56 +++++++ e2e/test_bootstrap_git_url_tag.sh | 56 +++++++ src/fromager/gitutils.py | 63 ++++++++ src/fromager/sources.py | 237 ++++++++++++++++++++++++++---- 6 files changed, 396 insertions(+), 32 deletions(-) create mode 100755 e2e/test_bootstrap_git_url.sh create mode 100755 e2e/test_bootstrap_git_url_tag.sh create mode 100644 src/fromager/gitutils.py diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 82aca312..f0b855a3 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -85,6 +85,8 @@ jobs: - bootstrap_prerelease - bootstrap_constraints - bootstrap_cache + - bootstrap_git_url + - bootstrap_git_url_tag - build - build_order - build_steps @@ -140,7 +142,7 @@ jobs: - name: Run tests run: ./e2e/test_${{ matrix.test-script }}.sh - + - name: Upload logs for debugging if: ${{ failure() }} uses: actions/upload-artifact@v4 diff --git a/.mergify.yml b/.mergify.yml index b39493f6..664e418f 100644 --- a/.mergify.yml +++ b/.mergify.yml @@ -31,6 +31,8 @@ pull_request_rules: - check-success=e2e (3.11, 1.75, bootstrap_cache, ubuntu-latest) - check-success=e2e (3.11, 1.75, bootstrap_constraints, ubuntu-latest) - check-success=e2e (3.11, 1.75, bootstrap_extras, ubuntu-latest) + - check-success=e2e (3.11, 1.75, bootstrap_git_url, ubuntu-latest) + - check-success=e2e (3.11, 1.75, bootstrap_git_url_tag, ubuntu-latest) - check-success=e2e (3.11, 1.75, bootstrap_prerelease, ubuntu-latest) - check-success=e2e (3.11, 1.75, build, ubuntu-latest) - check-success=e2e (3.11, 1.75, build_order, ubuntu-latest) @@ -39,6 +41,7 @@ pull_request_rules: - check-success=e2e (3.11, 1.75, download_sequence, ubuntu-latest) - check-success=e2e (3.11, 1.75, elfdeps, ubuntu-latest) - check-success=e2e (3.11, 1.75, extra_metadata, ubuntu-latest) + - check-success=e2e (3.11, 1.75, lint_requirements, ubuntu-latest) - check-success=e2e (3.11, 1.75, meson, ubuntu-latest) - check-success=e2e (3.11, 1.75, migrate_graph, ubuntu-latest) - check-success=e2e (3.11, 1.75, optimize_build, ubuntu-latest) @@ -48,7 +51,6 @@ pull_request_rules: - check-success=e2e (3.11, 1.75, prebuilt_wheels_alt_server, ubuntu-latest) - check-success=e2e (3.11, 1.75, report_missing_dependency, ubuntu-latest) - check-success=e2e (3.11, 1.75, rust_vendor, ubuntu-latest) - - check-success=e2e (3.11, 1.75, lint_requirements, ubuntu-latest) - check-success=e2e (3.12, 1.75, bootstrap, macos-latest) - check-success=e2e (3.12, 1.75, bootstrap, ubuntu-latest) - check-success=e2e (3.12, 1.75, bootstrap_build_tags, macos-latest) @@ -59,6 +61,10 @@ pull_request_rules: - check-success=e2e (3.12, 1.75, bootstrap_constraints, ubuntu-latest) - check-success=e2e (3.12, 1.75, bootstrap_extras, macos-latest) - check-success=e2e (3.12, 1.75, bootstrap_extras, ubuntu-latest) + - check-success=e2e (3.12, 1.75, bootstrap_git_url, macos-latest) + - check-success=e2e (3.12, 1.75, bootstrap_git_url, ubuntu-latest) + - check-success=e2e (3.12, 1.75, bootstrap_git_url_tag, macos-latest) + - check-success=e2e (3.12, 1.75, bootstrap_git_url_tag, ubuntu-latest) - check-success=e2e (3.12, 1.75, bootstrap_prerelease, macos-latest) - check-success=e2e (3.12, 1.75, bootstrap_prerelease, ubuntu-latest) - check-success=e2e (3.12, 1.75, build, macos-latest) @@ -75,6 +81,8 @@ pull_request_rules: - check-success=e2e (3.12, 1.75, elfdeps, ubuntu-latest) - check-success=e2e (3.12, 1.75, extra_metadata, macos-latest) - check-success=e2e (3.12, 1.75, extra_metadata, ubuntu-latest) + - check-success=e2e (3.12, 1.75, lint_requirements, macos-latest) + - check-success=e2e (3.12, 1.75, lint_requirements, ubuntu-latest) - check-success=e2e (3.12, 1.75, meson, macos-latest) - check-success=e2e (3.12, 1.75, meson, ubuntu-latest) - check-success=e2e (3.12, 1.75, migrate_graph, macos-latest) @@ -93,8 +101,6 @@ pull_request_rules: - check-success=e2e (3.12, 1.75, report_missing_dependency, ubuntu-latest) - check-success=e2e (3.12, 1.75, rust_vendor, macos-latest) - check-success=e2e (3.12, 1.75, rust_vendor, ubuntu-latest) - - check-success=e2e (3.12, 1.75, lint_requirements, macos-latest) - - check-success=e2e (3.12, 1.75, lint_requirements, ubuntu-latest) - "-draft" # At least 1 reviewer diff --git a/e2e/test_bootstrap_git_url.sh b/e2e/test_bootstrap_git_url.sh new file mode 100755 index 00000000..5eb9a98d --- /dev/null +++ b/e2e/test_bootstrap_git_url.sh @@ -0,0 +1,56 @@ +#!/bin/bash + +# Test bootstrapping from a requirement with a git+https URL witout specifying a +# version tag. + +SCRIPTDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +source "$SCRIPTDIR/common.sh" + +GIT_REPO_URL="https://opendev.org/openstack/stevedore.git" + +fromager \ + --debug \ + --log-file="$OUTDIR/bootstrap.log" \ + --error-log-file="$OUTDIR/fromager-errors.log" \ + --sdists-repo="$OUTDIR/sdists-repo" \ + --wheels-repo="$OUTDIR/wheels-repo" \ + --work-dir="$OUTDIR/work-dir" \ + --settings-dir="$SCRIPTDIR/changelog_settings" \ + bootstrap "stevedore @ git+${GIT_REPO_URL}" + +find "$OUTDIR/wheels-repo/" -name '*.whl' +find "$OUTDIR/sdists-repo/" -name '*.tar.gz' +ls "$OUTDIR"/work-dir/*/build.log || true + +EXPECTED_FILES=" +$OUTDIR/wheels-repo/downloads/setuptools-*.whl +$OUTDIR/wheels-repo/downloads/pbr-*.whl +$OUTDIR/wheels-repo/downloads/stevedore-*.whl + +$OUTDIR/sdists-repo/downloads/setuptools-*.tar.gz +$OUTDIR/sdists-repo/downloads/pbr-*.tar.gz + +$OUTDIR/sdists-repo/builds/stevedore-*.tar.gz +$OUTDIR/sdists-repo/builds/setuptools-*.tar.gz +$OUTDIR/sdists-repo/builds/pbr-*.tar.gz + +$OUTDIR/work-dir/build-order.json +$OUTDIR/work-dir/constraints.txt + +$OUTDIR/bootstrap.log +$OUTDIR/fromager-errors.log + +$OUTDIR/work-dir/pbr-*/build.log +$OUTDIR/work-dir/setuptools-*/build.log +$OUTDIR/work-dir/stevedore-*/build.log +" + +pass=true +for pattern in $EXPECTED_FILES; do + if [ ! -f "${pattern}" ]; then + echo "Did not find $pattern" 1>&2 + pass=false + fi +done + +$pass diff --git a/e2e/test_bootstrap_git_url_tag.sh b/e2e/test_bootstrap_git_url_tag.sh new file mode 100755 index 00000000..e5eff834 --- /dev/null +++ b/e2e/test_bootstrap_git_url_tag.sh @@ -0,0 +1,56 @@ +#!/bin/bash + +# Test bootstrapping from a requirement with a git+https URL and specifying a +# version tag. + +SCRIPTDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +source "$SCRIPTDIR/common.sh" + +GIT_REPO_URL="https://opendev.org/openstack/stevedore.git" + +fromager \ + --debug \ + --log-file="$OUTDIR/bootstrap.log" \ + --error-log-file="$OUTDIR/fromager-errors.log" \ + --sdists-repo="$OUTDIR/sdists-repo" \ + --wheels-repo="$OUTDIR/wheels-repo" \ + --work-dir="$OUTDIR/work-dir" \ + --settings-dir="$SCRIPTDIR/changelog_settings" \ + bootstrap "stevedore @ git+${GIT_REPO_URL}@5.2.0" + +find "$OUTDIR/wheels-repo/" -name '*.whl' +find "$OUTDIR/sdists-repo/" -name '*.tar.gz' +ls "$OUTDIR"/work-dir/*/build.log || true + +EXPECTED_FILES=" +$OUTDIR/wheels-repo/downloads/setuptools-*.whl +$OUTDIR/wheels-repo/downloads/pbr-*.whl +$OUTDIR/wheels-repo/downloads/stevedore-*.whl + +$OUTDIR/sdists-repo/downloads/setuptools-*.tar.gz +$OUTDIR/sdists-repo/downloads/pbr-*.tar.gz + +$OUTDIR/sdists-repo/builds/stevedore-*.tar.gz +$OUTDIR/sdists-repo/builds/setuptools-*.tar.gz +$OUTDIR/sdists-repo/builds/pbr-*.tar.gz + +$OUTDIR/work-dir/build-order.json +$OUTDIR/work-dir/constraints.txt + +$OUTDIR/bootstrap.log +$OUTDIR/fromager-errors.log + +$OUTDIR/work-dir/pbr-*/build.log +$OUTDIR/work-dir/setuptools-*/build.log +$OUTDIR/work-dir/stevedore-*/build.log +" + +pass=true +for pattern in $EXPECTED_FILES; do + if [ ! -f "${pattern}" ]; then + echo "Did not find $pattern" 1>&2 + pass=false + fi +done + +$pass diff --git a/src/fromager/gitutils.py b/src/fromager/gitutils.py new file mode 100644 index 00000000..daa63b68 --- /dev/null +++ b/src/fromager/gitutils.py @@ -0,0 +1,63 @@ +import logging +import pathlib +from urllib.parse import urlparse + +from packaging.requirements import Requirement + +from fromager import context, external_commands + +logger = logging.getLogger(__name__) + + +def git_clone( + *, + ctx: context.WorkContext, + req: Requirement, + output_dir: pathlib.Path, + repo_url: str, + tag: str | None = None, + ref: str | None = None, + submodules: bool | list[str] = False, +) -> pathlib.Path: + """Clone a git repository""" + if tag is not None and ref is not None: + raise ValueError("tag and ref are mutually exclusive") + + # Create a clean URL without any credentials for logging + parsed_url = urlparse(repo_url) + clean_url = parsed_url._replace(netloc=parsed_url.hostname or "").geturl() + logger.info( + "%s: cloning %s, tag %r, ref %r, submodules %r, into %s", + req.name, + clean_url, + tag, + ref, + submodules, + output_dir, + ) + cmd: list[str] = ["git", "clone"] + if tag is not None: + # --branch works with branches and tags, but not with commits + cmd.extend(["--branch", tag, "--depth", "1"]) + if submodules: + if isinstance(submodules, list): + for pathspec in submodules: + cmd.append(f"--recurse-submodules={pathspec}") + else: + # all submodules + cmd.append("--recurse-submodules") + if tag is not None: + cmd.append("--shallow-submodules") + cmd.extend([repo_url, str(output_dir)]) + external_commands.run(cmd, network_isolation=False) + + # --branch only works with names, so we have to checkout the reference we + # actually want if it is not a name + if ref is not None: + external_commands.run( + ["git", "checkout", "--recurse-submodules", "--force", ref], + cwd=str(output_dir), + network_isolation=False, + ) + + return output_dir diff --git a/src/fromager/sources.py b/src/fromager/sources.py index 8ca916bf..0cbfbb0c 100644 --- a/src/fromager/sources.py +++ b/src/fromager/sources.py @@ -7,8 +7,10 @@ import pathlib import shutil import tarfile +import tempfile import typing import zipfile +from email.parser import BytesParser from urllib.parse import urlparse import resolvelib @@ -18,6 +20,7 @@ from . import ( dependencies, external_commands, + gitutils, metrics, overrides, pyproject, @@ -56,6 +59,15 @@ def download_source( version: Version, download_url: str, ) -> pathlib.Path: + if req.url: + logger.info( + "%s: downloaded source to %s by cloning %s, ignoring any plugins", + req.name, + download_url, + req.url, + ) + return pathlib.Path(download_url) + source_path = overrides.find_and_invoke( req.name, "download_source", @@ -83,6 +95,15 @@ def resolve_source( req_type: RequirementType | None = None, ) -> tuple[str, Version]: "Return URL to source and its version." + + if req.url: + # If we have a URL, we should use that source. For now we only support + # git clone URLs of some sort. We are given the directory where the + # cloned repo resides, and return that as the URL for the source code so + # the next step in the process can find it and operate on it. + logger.info("%s: resolving source via URL, ignoring any plugins", req.name) + return resolve_version_from_git_url(ctx=ctx, req=req) + constraint = ctx.constraints.get_constraint(req.name) logger.debug( f"{req.name}: resolving requirement {req} using {sdist_server_url} with constraint {constraint}" @@ -119,6 +140,136 @@ def resolve_source( return str(url), version +def resolve_version_from_git_url( + ctx: context.WorkContext, + req: Requirement, +) -> tuple[str, Version]: + "Return path to the cloned git repository and the package version." + + if not req.url.startswith("git+"): + raise ValueError(f"unable to handle URL scheme in {req.url} from {req}") + + # We start by not knowing where we would put the source because we don't + # know the version. + working_src_dir = "" + version: Version | None = None + + # Clean up the URL so we can parse it + reduced_url = req.url[len("git+") :] + parsed_url = urlparse(reduced_url) + + # Save the URL that we think we will use for cloning. This might change + # later if the path has a tag or branch in it. + url_to_clone = reduced_url + need_to_clone = False + + # If the URL includes an @ with text after it, we use that as the reference + # to clone, but by default we take the default branch. + git_ref: str | None = None + + if "@" not in parsed_url.path: + # If we have no reference, we know we are going to have to clone the + # repository to figure out the version to use. + need_to_clone = True + else: + # If we have a reference, it might be a valid python version string, or + # not. It _must_ be a valid git reference. If it can be parsed as a + # valid python version, we assume the tag points to source that will + # think that is its version, so we allow reusing an existing cloned repo + # if there is one. + new_path, _, git_ref = parsed_url.path.rpartition("@") + url_to_clone = parsed_url._replace(path=new_path).geturl() + try: + version = Version(git_ref) + except ValueError: + logger.info( + "%s: could not parse %r as a version, cloning to get the version", + req.name, + git_ref, + ) + need_to_clone = True + else: + logger.info("%s: URL %s includes version %s", req.name, req.url, version) + working_src_dir = ( + ctx.work_dir / f"{req.name}-{version}" / f"{req.name}-{version}" + ) + if not working_src_dir.exists(): + need_to_clone = True + else: + if ctx.cleanup: + logger.debug("%s: cleaning up %s", req.name, working_src_dir) + shutil.rmtree(working_src_dir) + need_to_clone = True + else: + logger.info("%s: reusing %s", req.name, working_src_dir) + + if need_to_clone: + with tempfile.TemporaryDirectory() as tmpdir: + clone_dir = pathlib.Path(tmpdir) / "src" + gitutils.git_clone( + ctx=ctx, + req=req, + output_dir=clone_dir, + repo_url=url_to_clone, + ref=git_ref, + ) + if not version: + # If we still do not have a version, get it from the package + # metadata. + version = _get_version_from_package_metadata(ctx, req, clone_dir) + logger.info("%s: found version %s", req.name, version) + working_src_dir = ( + ctx.work_dir / f"{req.name}-{version}" / f"{req.name}-{version}" + ) + if working_src_dir.exists(): + # We have to check if the destination directory exists + # because if we were not given a version we did not clean it + # up earlier. We do not use ctx.cleanup to control this + # action because we cannot trust that the destination + # directory is reusable because we have had to compute the + # version and we cannot be sure that the version is dynamic + # Two different commits in the repo could have the same + # version if that version is set with static data in the + # repo instead of via a tag or dynamically computed by + # something like setuptools-scm. + logger.debug("%s: cleaning up %s", req.name, working_src_dir) + shutil.rmtree(working_src_dir) + need_to_clone = True + working_src_dir.parent.mkdir(parents=True, exist_ok=True) + logger.info("%s: moving cloned repo to %s", req.name, working_src_dir) + shutil.move(clone_dir, working_src_dir) + + # We must know the version and we must have a source directory. + assert version + assert working_src_dir + assert working_src_dir.exists() + return (working_src_dir, version) + + +def _get_version_from_package_metadata( + ctx: context.WorkContext, + req: Requirement, + source_dir: str, +) -> Version: + logger.info(f"{req.name}: generating metadata to get version") + pyproject_toml = dependencies.get_pyproject_contents(source_dir) + hook_caller = dependencies.get_build_backend_hook_caller( + source_dir, + pyproject_toml, + {}, + network_isolation=ctx.network_isolation, + ) + metadata_dir_base = hook_caller.prepare_metadata_for_build_wheel( + metadata_directory=source_dir.parent, + config_settings={}, + ) + metadata_filename = source_dir.parent / metadata_dir_base / "METADATA" + with open(metadata_filename, "rb") as f: + p = BytesParser() + metadata = p.parse(f, headersonly=True) + return Version(metadata["Version"]) + + def default_resolve_source( ctx: context.WorkContext, req: Requirement, @@ -342,24 +493,38 @@ def prepare_source( source_filename: pathlib.Path, version: Version, ) -> pathlib.Path: - logger.info(f"{req.name}: preparing source for {req} from {source_filename}") - prepare_source_details = overrides.find_and_invoke( - req.name, - "prepare_source", - default_prepare_source, - ctx=ctx, - req=req, - source_filename=source_filename, - version=version, - ) - if not isinstance(prepare_source_details, tuple): - source_root_dir = prepare_source_details - elif len(prepare_source_details) == 2: - source_root_dir, _ = prepare_source_details + if req.url: + logger.info( + "%s: preparing source cloned from %s, ignoring any plugins", + req.name, + req.url, + ) + source_root_dir = pathlib.Path(source_filename) + prepare_new_source( + ctx=ctx, + req=req, + source_root_dir=source_root_dir, + version=version, + ) else: - raise ValueError( - f"do not know how to unpack {prepare_source_details}, expected 1 or 2 members" + logger.info(f"{req.name}: preparing source for {req} from {source_filename}") + prepare_source_details = overrides.find_and_invoke( + req.name, + "prepare_source", + default_prepare_source, + ctx=ctx, + req=req, + source_filename=source_filename, + version=version, ) + if not isinstance(prepare_source_details, tuple): + source_root_dir = prepare_source_details + elif len(prepare_source_details) == 2: + source_root_dir, _ = prepare_source_details + else: + raise ValueError( + f"do not know how to unpack {prepare_source_details}, expected 1 or 2 members" + ) write_build_meta(source_root_dir.parent, req, source_filename, version) if source_root_dir is not None: logger.info(f"{req.name}: prepared source for {req} at {source_root_dir}") @@ -430,18 +595,34 @@ def build_sdist( logger.info(f"{req.name}: building source distribution in {build_dir}") extra_environ = pbi.get_extra_environ(build_env=build_env) - sdist_filename = overrides.find_and_invoke( - req.name, - "build_sdist", - default_build_sdist, - ctx=ctx, - extra_environ=extra_environ, - req=req, - version=version, - sdist_root_dir=sdist_root_dir, - build_dir=build_dir, - build_env=build_env, - ) + if req.url: + # The default approach to making an sdist is to make a tarball from the + # source directory, since most of the time we got the source directory + # by unpacking an existing sdist. When we know we cloned a git repo to + # get the source tree, we can be very sure that creating a tarball will + # NOT produce a valid sdist, so we can use the PEP-517 approach + # instead. + logger.info("%s: using PEP-517 sdist build, ignoring any plugins", req.name) + sdist_filename = pep517_build_sdist( + ctx=ctx, + extra_environ=extra_environ, + req=req, + sdist_root_dir=sdist_root_dir, + version=version, + ) + else: + sdist_filename = overrides.find_and_invoke( + req.name, + "build_sdist", + default_build_sdist, + ctx=ctx, + extra_environ=extra_environ, + req=req, + version=version, + sdist_root_dir=sdist_root_dir, + build_dir=build_dir, + build_env=build_env, + ) logger.info(f"{req.name}: built source distribution {sdist_filename}") return sdist_filename