diff --git a/.github/workflows/docs-ci.yml b/.github/workflows/docs-ci.yml new file mode 100644 index 0000000..18a44aa --- /dev/null +++ b/.github/workflows/docs-ci.yml @@ -0,0 +1,37 @@ +name: CI Documentation + +on: [push, pull_request] + +jobs: + build: + runs-on: ubuntu-20.04 + + strategy: + max-parallel: 4 + matrix: + python-version: [3.9] + + steps: + - name: Checkout code + uses: actions/checkout@v2 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + + - name: Give permission to run scripts + run: chmod +x ./docs/scripts/doc8_style_check.sh + + - name: Install Dependencies + run: pip install -e .[docs] + + - name: Check Sphinx Documentation build minimally + working-directory: ./docs + run: sphinx-build -E -W source build + + - name: Check for documentation style errors + working-directory: ./docs + run: ./scripts/doc8_style_check.sh + + diff --git a/.github/workflows/pypi-release.yml b/.github/workflows/pypi-release.yml new file mode 100644 index 0000000..3a4fe27 --- /dev/null +++ b/.github/workflows/pypi-release.yml @@ -0,0 +1,27 @@ +name: Release library as a PyPI wheel and sdist on GH release creation + +on: + release: + types: [created] + +jobs: + build-and-publish-to-pypi: + name: Build and publish library to PyPI + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@master + - name: Set up Python + uses: actions/setup-python@v1 + with: + python-version: 3.9 + - name: Install pypa/build + run: python -m pip install build --user + - name: Build a binary wheel and a source tarball + run: python -m build --sdist --wheel --outdir dist/ + . + - name: Publish distribution to PyPI + if: startsWith(github.ref, 'refs/tags') + uses: pypa/gh-action-pypi-publish@master + with: + password: ${{ secrets.PYPI_API_TOKEN }} + diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index b022fc4..0000000 --- a/.travis.yml +++ /dev/null @@ -1,22 +0,0 @@ -# This is a skeleton Travis CI config file that provides a starting point for adding CI -# to a Python project. Since we primarily develop in python3, this skeleton config file -# will be specific to that language. -# -# See https://config.travis-ci.com/ for a full list of configuration options. - -os: linux - -dist: xenial - -language: python -python: - - "3.6" - - "3.7" - - "3.8" - - "3.9" - -# Scripts to run at install stage -install: ./configure --dev - -# Scripts to run at script stage -script: venv/bin/pytest --ignore=tests/test_vmimage.py diff --git a/CHANGELOG.rst b/CHANGELOG.rst index e441787..106598c 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,7 +1,13 @@ Changelog ========= -v30.0.0 (next) +v31.0.0 +-------- + +- Do not install patch.py by default. Instead this is now an extra + + +v30.0.0 -------- - Update to the latest skeleton. The virtualenv is now created under the venv diff --git a/CODE_OF_CONDUCT.rst b/CODE_OF_CONDUCT.rst new file mode 100644 index 0000000..590ba19 --- /dev/null +++ b/CODE_OF_CONDUCT.rst @@ -0,0 +1,86 @@ +Contributor Covenant Code of Conduct +==================================== + +Our Pledge +---------- + +In the interest of fostering an open and welcoming environment, we as +contributors and maintainers pledge to making participation in our +project and our community a harassment-free experience for everyone, +regardless of age, body size, disability, ethnicity, gender identity and +expression, level of experience, education, socio-economic status, +nationality, personal appearance, race, religion, or sexual identity and +orientation. + +Our Standards +------------- + +Examples of behavior that contributes to creating a positive environment +include: + +- Using welcoming and inclusive language +- Being respectful of differing viewpoints and experiences +- Gracefully accepting constructive criticism +- Focusing on what is best for the community +- Showing empathy towards other community members + +Examples of unacceptable behavior by participants include: + +- The use of sexualized language or imagery and unwelcome sexual + attention or advances +- Trolling, insulting/derogatory comments, and personal or political + attacks +- Public or private harassment +- Publishing others’ private information, such as a physical or + electronic address, without explicit permission +- Other conduct which could reasonably be considered inappropriate in a + professional setting + +Our Responsibilities +-------------------- + +Project maintainers are responsible for clarifying the standards of +acceptable behavior and are expected to take appropriate and fair +corrective action in response to any instances of unacceptable behavior. + +Project maintainers have the right and responsibility to remove, edit, +or reject comments, commits, code, wiki edits, issues, and other +contributions that are not aligned to this Code of Conduct, or to ban +temporarily or permanently any contributor for other behaviors that they +deem inappropriate, threatening, offensive, or harmful. + +Scope +----- + +This Code of Conduct applies both within project spaces and in public +spaces when an individual is representing the project or its community. +Examples of representing a project or community include using an +official project e-mail address, posting via an official social media +account, or acting as an appointed representative at an online or +offline event. Representation of a project may be further defined and +clarified by project maintainers. + +Enforcement +----------- + +Instances of abusive, harassing, or otherwise unacceptable behavior may +be reported by contacting the project team at pombredanne@gmail.com +or on the Gitter chat channel at https://gitter.im/aboutcode-org/discuss . +All complaints will be reviewed and investigated and will result in a +response that is deemed necessary and appropriate to the circumstances. +The project team is obligated to maintain confidentiality with regard to +the reporter of an incident. Further details of specific enforcement +policies may be posted separately. + +Project maintainers who do not follow or enforce the Code of Conduct in +good faith may face temporary or permanent repercussions as determined +by other members of the project’s leadership. + +Attribution +----------- + +This Code of Conduct is adapted from the `Contributor Covenant`_ , +version 1.4, available at +https://www.contributor-covenant.org/version/1/4/code-of-conduct.html + +.. _Contributor Covenant: https://www.contributor-covenant.org diff --git a/MANIFEST.in b/MANIFEST.in index ef3721e..c17471b 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -9,7 +9,9 @@ include *.rst include setup.* include configure* include requirements* -include .git* +include .giti* + +include extractcode* global-exclude *.py[co] __pycache__ *.*~ diff --git a/README.rst b/README.rst index 94e6141..24063b8 100644 --- a/README.rst +++ b/README.rst @@ -1,5 +1,6 @@ +============ ExtractCode -=========== +============ - license: Apache-2.0 - copyright: copyright (c) nexB. Inc. and others @@ -21,7 +22,7 @@ Why another extractor? **it will extract!** -ExtractCode will extract things where other extractors may fail. +ExtractCode will extract things where other archive and compressed file extractors may fail. ExtractCode supports one of largest number of archive formats listed in the long `List of supported archive formats`_ found at the bottom of this document. diff --git a/appveyor.yml b/appveyor.yml new file mode 100644 index 0000000..6c9d089 --- /dev/null +++ b/appveyor.yml @@ -0,0 +1,19 @@ + +################################################################################ +# We use Appveyor to run minimal smoke tests suites on Pythons 3.x +# on Windows 64 bits +################################################################################ +environment: + matrix: + - PYTHON: "C:\\Python36-x64" +# - PYTHON: "C:\\Python37-x64" +# - PYTHON: "C:\\Python38-x64" +# - PYTHON: "C:\\Python39-x64" + + +build: off + + +test_script: + - python -c "import sys;print(sys.getdefaultencoding())" + - cmd: "set PYTHON_EXECUTABLE=%PYTHON%\\python.exe && configure --dev && venv\\Scripts\\pytest -vvs tests" diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 8e971d7..c0b2c58 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -7,19 +7,11 @@ jobs: - - template: etc/ci/azure-posix.yml - parameters: - job_name: ubuntu16_cpython - image_name: ubuntu-16.04 - python_versions: ['3.6', '3.7', '3.8', '3.9'] - test_suites: - all: sudo chmod 0644 /boot/vmlinuz-* && sudo apt install libguestfs-tools && tmp/bin/pytest -vvs - - template: etc/ci/azure-posix.yml parameters: job_name: ubuntu18_cpython image_name: ubuntu-18.04 - python_versions: ['3.6', '3.7', '3.8', '3.9'] + python_versions: ['3.6', '3.7', '3.8', '3.9', '3.10'] test_suites: all: sudo chmod 0644 /boot/vmlinuz-* && sudo apt install libguestfs-tools && venv/bin/pytest -n 2 -vvs @@ -27,38 +19,38 @@ jobs: parameters: job_name: ubuntu20_cpython image_name: ubuntu-20.04 - python_versions: ['3.6', '3.7', '3.8', '3.9'] + python_versions: ['3.6', '3.7', '3.8', '3.9', '3.10'] test_suites: all: sudo chmod 0644 /boot/vmlinuz-* && sudo apt install libguestfs-tools && venv/bin/pytest -n 2 -vvs - template: etc/ci/azure-posix.yml parameters: - job_name: macos1014_cpython - image_name: macos-10.14 - python_versions: ['3.6', '3.7', '3.8', '3.9'] + job_name: macos1015_cpython + image_name: macos-10.15 + python_versions: ['3.6', '3.7', '3.8', '3.9', '3.10'] test_suites: all: venv/bin/pytest -n 2 -vvs - template: etc/ci/azure-posix.yml parameters: - job_name: macos1015_cpython - image_name: macos-10.15 - python_versions: ['3.6', '3.7', '3.8', '3.9'] + job_name: macos11_cpython + image_name: macos-11 + python_versions: ['3.7', '3.8', '3.9', '3.10'] test_suites: all: venv/bin/pytest -n 2 -vvs - template: etc/ci/azure-win.yml parameters: - job_name: win2016_cpython - image_name: vs2017-win2016 - python_versions: ['3.6', '3.7', '3.8', '3.9'] + job_name: win2019_cpython + image_name: windows-2019 + python_versions: ['3.6', '3.7', '3.8', '3.9', '3.10'] test_suites: all: venv\Scripts\pytest -n 2 -vvs - template: etc/ci/azure-win.yml parameters: - job_name: win2019_cpython - image_name: windows-2019 - python_versions: ['3.6', '3.7', '3.8', '3.9'] + job_name: win2022_cpython + image_name: windows-2022 + python_versions: ['3.7', '3.8', '3.9', '3.10'] test_suites: all: venv\Scripts\pytest -n 2 -vvs diff --git a/configure b/configure index bb9ae2e..397f0e4 100755 --- a/configure +++ b/configure @@ -11,11 +11,13 @@ set -e #set -x ################################ -# A configuration script to set things up: +# A configuration script to set things up: # create a virtualenv and install or update thirdparty packages. # Source this script for initial configuration # Use configure --help for details # +# NOTE: please keep in sync with Windows script configure.bat +# # This script will search for a virtualenv.pyz app in etc/thirdparty/virtualenv.pyz # Otherwise it will download the latest from the VIRTUALENV_PYZ_URL default ################################ @@ -27,15 +29,14 @@ CLI_ARGS=$1 # Requirement arguments passed to pip and used by default or with --dev. REQUIREMENTS="--editable .[full] --constraint requirements.txt" -DEV_REQUIREMENTS="--editable .[full,testing] --constraint requirements.txt --constraint requirements-dev.txt" +DEV_REQUIREMENTS="--editable .[full,testing,patch] --constraint requirements.txt --constraint requirements-dev.txt" +DOCS_REQUIREMENTS="--editable .[docs] --constraint requirements.txt" # where we create a virtualenv VIRTUALENV_DIR=venv -# Cleanable files and directories with the --clean option -CLEANABLE=" - build - venv" +# Cleanable files and directories to delete with the --clean option +CLEANABLE="build venv" # extra arguments passed to pip PIP_EXTRA_ARGS=" " @@ -50,71 +51,39 @@ VIRTUALENV_PYZ_URL=https://bootstrap.pypa.io/virtualenv.pyz CFG_ROOT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" CFG_BIN_DIR=$CFG_ROOT_DIR/$VIRTUALENV_DIR/bin + +################################ +# Thirdparty package locations and index handling # Find packages from the local thirdparty directory or from thirdparty.aboutcode.org -if [ -f "$CFG_ROOT_DIR/thirdparty" ]; then - PIP_EXTRA_ARGS="--find-links $CFG_ROOT_DIR/thirdparty " +if [ -d "$CFG_ROOT_DIR/thirdparty" ]; then + PIP_EXTRA_ARGS="--find-links $CFG_ROOT_DIR/thirdparty" fi -PIP_EXTRA_ARGS="$PIP_EXTRA_ARGS --find-links https://thirdparty.aboutcode.org/pypi" - +PIP_EXTRA_ARGS="$PIP_EXTRA_ARGS --find-links https://thirdparty.aboutcode.org/pypi/simple/links.html" ################################ -# Set the quiet flag to empty if not defined +# Set the quiet flag to empty if not defined if [[ "$CFG_QUIET" == "" ]]; then CFG_QUIET=" " fi ################################ -# find a proper Python to run +# Find a proper Python to run # Use environment variables or a file if available. # Otherwise the latest Python by default. -if [[ "$PYTHON_EXECUTABLE" == "" ]]; then - # check for a file named PYTHON_EXECUTABLE - if [ -f "$CFG_ROOT_DIR/PYTHON_EXECUTABLE" ]; then - PYTHON_EXECUTABLE=$(cat "$CFG_ROOT_DIR/PYTHON_EXECUTABLE") - else - PYTHON_EXECUTABLE=python3 +find_python() { + if [[ "$PYTHON_EXECUTABLE" == "" ]]; then + # check for a file named PYTHON_EXECUTABLE + if [ -f "$CFG_ROOT_DIR/PYTHON_EXECUTABLE" ]; then + PYTHON_EXECUTABLE=$(cat "$CFG_ROOT_DIR/PYTHON_EXECUTABLE") + else + PYTHON_EXECUTABLE=python3 + fi fi -fi - - -################################ -cli_help() { - echo An initial configuration script - echo " usage: ./configure [options]" - echo - echo The default is to configure for regular use. Use --dev for development. - echo Use the --init option if starting a new project and the project - echo dependencies are not available on thirdparty.aboutcode.org/pypi/ - echo and requirements.txt and/or requirements-dev.txt has not been generated. - echo - echo The options are: - echo " --clean: clean built and installed files and exit." - echo " --dev: configure the environment for development." - echo " --init: pull dependencies from PyPI. Used when first setting up a project." - echo " --help: display this help message and exit." - echo - echo By default, the python interpreter version found in the path is used. - echo Alternatively, the PYTHON_EXECUTABLE environment variable can be set to - echo configure another Python executable interpreter to use. If this is not - echo set, a file named PYTHON_EXECUTABLE containing a single line with the - echo path of the Python executable to use will be checked last. - set +e - exit -} - - -clean() { - # Remove cleanable file and directories and files from the root dir. - echo "* Cleaning ..." - for cln in $CLEANABLE; - do rm -rf "${CFG_ROOT_DIR:?}/${cln:?}"; - done - set +e - exit } +################################ create_virtualenv() { # create a virtualenv for Python # Note: we do not use the bundled Python 3 "venv" because its behavior and @@ -145,6 +114,7 @@ create_virtualenv() { } +################################ install_packages() { # install requirements in virtualenv # note: --no-build-isolation means that pip/wheel/setuptools will not @@ -161,11 +131,43 @@ install_packages() { } +################################ +cli_help() { + echo An initial configuration script + echo " usage: ./configure [options]" + echo + echo The default is to configure for regular use. Use --dev for development. + echo + echo The options are: + echo " --clean: clean built and installed files and exit." + echo " --dev: configure the environment for development." + echo " --help: display this help message and exit." + echo + echo By default, the python interpreter version found in the path is used. + echo Alternatively, the PYTHON_EXECUTABLE environment variable can be set to + echo configure another Python executable interpreter to use. If this is not + echo set, a file named PYTHON_EXECUTABLE containing a single line with the + echo path of the Python executable to use will be checked last. + set +e + exit +} + + +################################ +clean() { + # Remove cleanable file and directories and files from the root dir. + echo "* Cleaning ..." + for cln in $CLEANABLE; + do rm -rf "${CFG_ROOT_DIR:?}/${cln:?}"; + done + set +e + exit +} + + ################################ # Main command line entry point -CFG_DEV_MODE=0 CFG_REQUIREMENTS=$REQUIREMENTS -NO_INDEX="--no-index" # We are using getopts to parse option arguments that start with "-" while getopts :-: optchar; do @@ -173,17 +175,19 @@ while getopts :-: optchar; do -) case "${OPTARG}" in help ) cli_help;; - clean ) clean;; - dev ) CFG_REQUIREMENTS="$DEV_REQUIREMENTS" && CFG_DEV_MODE=1;; - init ) NO_INDEX="";; + clean ) find_python && clean;; + dev ) CFG_REQUIREMENTS="$DEV_REQUIREMENTS";; + docs ) CFG_REQUIREMENTS="$DOCS_REQUIREMENTS";; esac;; -esac + esac done -PIP_EXTRA_ARGS="$PIP_EXTRA_ARGS $NO_INDEX" +PIP_EXTRA_ARGS="$PIP_EXTRA_ARGS" +find_python create_virtualenv "$VIRTUALENV_DIR" install_packages "$CFG_REQUIREMENTS" . "$CFG_BIN_DIR/activate" + set +e diff --git a/configure.bat b/configure.bat index d0e8052..4174d09 100644 --- a/configure.bat +++ b/configure.bat @@ -14,6 +14,8 @@ @rem # Source this script for initial configuration @rem # Use configure --help for details +@rem # NOTE: please keep in sync with POSIX script configure + @rem # This script will search for a virtualenv.pyz app in etc\thirdparty\virtualenv.pyz @rem # Otherwise it will download the latest from the VIRTUALENV_PYZ_URL default @rem ################################ @@ -25,7 +27,8 @@ @rem # Requirement arguments passed to pip and used by default or with --dev. set "REQUIREMENTS=--editable .[full] --constraint requirements.txt" -set "DEV_REQUIREMENTS=--editable .[full,testing] --constraint requirements.txt --constraint requirements-dev.txt" +set "DEV_REQUIREMENTS=--editable .[full,testing,patch] --constraint requirements.txt --constraint requirements-dev.txt" +set "DOCS_REQUIREMENTS=--editable .[docs] --constraint requirements.txt" @rem # where we create a virtualenv set "VIRTUALENV_DIR=venv" @@ -49,12 +52,11 @@ set "CFG_BIN_DIR=%CFG_ROOT_DIR%\%VIRTUALENV_DIR%\Scripts" @rem ################################ @rem # Thirdparty package locations and index handling -if exist ""%CFG_ROOT_DIR%\thirdparty"" ( - set "PIP_EXTRA_ARGS=--find-links %CFG_ROOT_DIR%\thirdparty " +@rem # Find packages from the local thirdparty directory or from thirdparty.aboutcode.org +if exist "%CFG_ROOT_DIR%\thirdparty" ( + set PIP_EXTRA_ARGS=--find-links "%CFG_ROOT_DIR%\thirdparty" ) - -set "PIP_EXTRA_ARGS=%PIP_EXTRA_ARGS% --find-links https://thirdparty.aboutcode.org/pypi" & %INDEX_ARG% -@rem ################################ +set "PIP_EXTRA_ARGS=%PIP_EXTRA_ARGS% --find-links https://thirdparty.aboutcode.org/pypi/simple/links.html" @rem ################################ @@ -66,53 +68,53 @@ if not defined CFG_QUIET ( @rem ################################ @rem # Main command line entry point -set CFG_DEV_MODE=0 set "CFG_REQUIREMENTS=%REQUIREMENTS%" set "NO_INDEX=--no-index" :again if not "%1" == "" ( -if "%1" EQU "--help" (goto cli_help) -if "%1" EQU "--clean" (goto clean) -if "%1" EQU "--dev" ( - set "CFG_REQUIREMENTS=%DEV_REQUIREMENTS%" - set CFG_DEV_MODE=1 -) - if "%1" EQU "--init" ( - set "NO_INDEX= " + if "%1" EQU "--help" (goto cli_help) + if "%1" EQU "--clean" (goto clean) + if "%1" EQU "--dev" ( + set "CFG_REQUIREMENTS=%DEV_REQUIREMENTS%" + ) + if "%1" EQU "--docs" ( + set "CFG_REQUIREMENTS=%DOCS_REQUIREMENTS%" ) shift goto again ) -set "PIP_EXTRA_ARGS=%PIP_EXTRA_ARGS% %NO_INDEX%" +set "PIP_EXTRA_ARGS=%PIP_EXTRA_ARGS%" @rem ################################ -@rem # find a proper Python to run +@rem # Find a proper Python to run @rem # Use environment variables or a file if available. @rem # Otherwise the latest Python by default. if not defined PYTHON_EXECUTABLE ( @rem # check for a file named PYTHON_EXECUTABLE - if exist ""%CFG_ROOT_DIR%\PYTHON_EXECUTABLE"" ( - set /p PYTHON_EXECUTABLE=<""%CFG_ROOT_DIR%\PYTHON_EXECUTABLE"" + if exist "%CFG_ROOT_DIR%\PYTHON_EXECUTABLE" ( + set /p PYTHON_EXECUTABLE=<"%CFG_ROOT_DIR%\PYTHON_EXECUTABLE" ) else ( set "PYTHON_EXECUTABLE=py" ) ) + +@rem ################################ :create_virtualenv @rem # create a virtualenv for Python @rem # Note: we do not use the bundled Python 3 "venv" because its behavior and @rem # presence is not consistent across Linux distro and sometimes pip is not @rem # included either by default. The virtualenv.pyz app cures all these issues. -if not exist ""%CFG_BIN_DIR%\python.exe"" ( +if not exist "%CFG_BIN_DIR%\python.exe" ( if not exist "%CFG_BIN_DIR%" ( - mkdir %CFG_BIN_DIR% + mkdir "%CFG_BIN_DIR%" ) - if exist ""%CFG_ROOT_DIR%\etc\thirdparty\virtualenv.pyz"" ( + if exist "%CFG_ROOT_DIR%\etc\thirdparty\virtualenv.pyz" ( %PYTHON_EXECUTABLE% "%CFG_ROOT_DIR%\etc\thirdparty\virtualenv.pyz" ^ --wheel embed --pip embed --setuptools embed ^ --seeder pip ^ @@ -120,9 +122,9 @@ if not exist ""%CFG_BIN_DIR%\python.exe"" ( --no-periodic-update ^ --no-vcs-ignore ^ %CFG_QUIET% ^ - %CFG_ROOT_DIR%\%VIRTUALENV_DIR% + "%CFG_ROOT_DIR%\%VIRTUALENV_DIR%" ) else ( - if not exist ""%CFG_ROOT_DIR%\%VIRTUALENV_DIR%\virtualenv.pyz"" ( + if not exist "%CFG_ROOT_DIR%\%VIRTUALENV_DIR%\virtualenv.pyz" ( curl -o "%CFG_ROOT_DIR%\%VIRTUALENV_DIR%\virtualenv.pyz" %VIRTUALENV_PYZ_URL% if %ERRORLEVEL% neq 0 ( @@ -136,7 +138,7 @@ if not exist ""%CFG_BIN_DIR%\python.exe"" ( --no-periodic-update ^ --no-vcs-ignore ^ %CFG_QUIET% ^ - %CFG_ROOT_DIR%\%VIRTUALENV_DIR% + "%CFG_ROOT_DIR%\%VIRTUALENV_DIR%" ) ) @@ -145,6 +147,7 @@ if %ERRORLEVEL% neq 0 ( ) +@rem ################################ :install_packages @rem # install requirements in virtualenv @rem # note: --no-build-isolation means that pip/wheel/setuptools will not @@ -152,15 +155,21 @@ if %ERRORLEVEL% neq 0 ( @rem # speeds up the installation. @rem # We always have the PEP517 build dependencies installed already. -%CFG_BIN_DIR%\pip install ^ +"%CFG_BIN_DIR%\pip" install ^ --upgrade ^ --no-build-isolation ^ %CFG_QUIET% ^ %PIP_EXTRA_ARGS% ^ %CFG_REQUIREMENTS% + +@rem ################################ +:create_bin_junction @rem # Create junction to bin to have the same directory between linux and windows -mklink /J %CFG_ROOT_DIR%\%VIRTUALENV_DIR%\bin %CFG_ROOT_DIR%\%VIRTUALENV_DIR%\Scripts +if exist "%CFG_ROOT_DIR%\%VIRTUALENV_DIR%\bin" ( + rmdir /s /q "%CFG_ROOT_DIR%\%VIRTUALENV_DIR%\bin" +) +mklink /J "%CFG_ROOT_DIR%\%VIRTUALENV_DIR%\bin" "%CFG_ROOT_DIR%\%VIRTUALENV_DIR%\Scripts" if %ERRORLEVEL% neq 0 ( exit /b %ERRORLEVEL% @@ -170,20 +179,15 @@ exit /b 0 @rem ################################ - :cli_help echo An initial configuration script echo " usage: configure [options]" echo " " echo The default is to configure for regular use. Use --dev for development. - echo Use the --init option if starting a new project and the project - echo dependencies are not available on thirdparty.aboutcode.org/pypi/ - echo and requirements.txt and/or requirements-dev.txt has not been generated. echo " " echo The options are: echo " --clean: clean built and installed files and exit." echo " --dev: configure the environment for development." - echo " --init: pull dependencies from PyPI. Used when first setting up a project." echo " --help: display this help message and exit." echo " " echo By default, the python interpreter version found in the path is used. @@ -194,6 +198,7 @@ exit /b 0 exit /b 0 +@rem ################################ :clean @rem # Remove cleanable file and directories and files from the root dir. echo "* Cleaning ..." diff --git a/docs/skeleton-usage.rst b/docs/skeleton-usage.rst deleted file mode 100644 index 7d16259..0000000 --- a/docs/skeleton-usage.rst +++ /dev/null @@ -1,157 +0,0 @@ -Usage -===== -A brand new project -------------------- -.. code-block:: bash - - git init my-new-repo - cd my-new-repo - git pull git@github.com:nexB/skeleton - - # Create the new repo on GitHub, then update your remote - git remote set-url origin git@github.com:nexB/your-new-repo.git - -From here, you can make the appropriate changes to the files for your specific project. - -Update an existing project ---------------------------- -.. code-block:: bash - - cd my-existing-project - git remote add skeleton git@github.com:nexB/skeleton - git fetch skeleton - git merge skeleton/main --allow-unrelated-histories - -This is also the workflow to use when updating the skeleton files in any given repository. - -Customizing ------------ - -You typically want to perform these customizations: - -- remove or update the src/README.rst and tests/README.rst files -- set project info and dependencies in setup.cfg -- check the configure and configure.bat defaults - -Initializing a project ----------------------- - -All projects using the skeleton will be expected to pull all of it dependencies -from thirdparty.aboutcode.org/pypi or the local thirdparty directory, using -requirements.txt and/or requirements-dev.txt to determine what version of a -package to collect. By default, PyPI will not be used to find and collect -packages from. - -In the case where we are starting a new project where we do not have -requirements.txt and requirements-dev.txt and whose dependencies are not yet on -thirdparty.aboutcode.org/pypi, we run the following command after adding and -customizing the skeleton files to your project: - -.. code-block:: bash - - ./configure --init - -This will initialize the virtual environment for the project, pull in the -dependencies from PyPI and add them to the virtual environment. - -Generating requirements.txt and requirements-dev.txt ----------------------------------------------------- - -After the project has been initialized, we can generate the requirements.txt and -requirements-dev.txt files. - -Ensure the virtual environment is enabled. - -.. code-block:: bash - - source venv/bin/activate - -To generate requirements.txt: - -.. code-block:: bash - - python etc/scripts/gen_requirements.py -s venv/lib/python/site-packages/ - -Replace \ with the version number of the Python being used, for example: ``venv/lib/python3.6/site-packages/`` - -To generate requirements-dev.txt after requirements.txt has been generated: - -.. code-block:: bash - ./configure --init --dev - python etc/scripts/gen_requirements_dev.py -s venv/lib/python/site-packages/ - -Note: on Windows, the ``site-packages`` directory is located at ``venv\Lib\site-packages\`` - -.. code-block:: bash - - python .\\etc\\scripts\\gen_requirements.py -s .\\venv\\Lib\\site-packages\\ - .\configure --init --dev - python .\\etc\\scripts\\gen_requirements_dev.py -s .\\venv\\Lib\\site-packages\\ - -Collecting and generating ABOUT files for dependencies ------------------------------------------------------- - -Ensure that the dependencies used by ``etc/scripts/bootstrap.py`` are installed: - -.. code-block:: bash - - pip install -r etc/scripts/requirements.txt - -Once we have requirements.txt and requirements-dev.txt, we can fetch the project -dependencies as wheels and generate ABOUT files for them: - -.. code-block:: bash - - python etc/scripts/bootstrap.py -r requirements.txt -r requirements-dev.txt --with-deps - -There may be issues with the generated ABOUT files, which will have to be -corrected. You can check to see if your corrections are valid by running: - -.. code-block:: bash - - python etc/scripts/check_thirdparty.py -d thirdparty - -Once the wheels are collected and the ABOUT files are generated and correct, -upload them to thirdparty.aboutcode.org/pypi by placing the wheels and ABOUT -files from the thirdparty directory to the pypi directory at -https://github.com/nexB/thirdparty-packages - - -Usage after project initialization ----------------------------------- - -Once the ``requirements.txt`` and ``requirements-dev.txt`` have been generated -and the project dependencies and their ABOUT files have been uploaded to -thirdparty.aboutcode.org/pypi, you can configure the project without using the -``--init`` option. - -If the virtual env for the project becomes polluted, or you would like to remove -it, use the ``--clean`` option: - -.. code-block:: bash - - ./configure --clean - -Then you can run ``./configure`` again to set up the project virtual environment. - -To set up the project for development use: - -.. code-block:: bash - - ./configure --dev - -To update the project dependencies (adding, removing, updating packages, etc.), -update the dependencies in ``setup.cfg``, then run: - -.. code-block:: bash - - ./configure --clean # Remove existing virtual environment - ./configure --init # Create project virtual environment, pull in new dependencies - source venv/bin/activate # Ensure virtual environment is activated - python etc/scripts/gen_requirements.py -s venv/lib/python/site-packages/ # Regenerate requirements.txt - python etc/scripts/gen_requirements_dev.py -s venv/lib/python/site-packages/ # Regenerate requirements-dev.txt - pip install -r etc/scripts/requirements.txt # Install dependencies needed by etc/scripts/bootstrap.py - python etc/scripts/bootstrap.py -r requirements.txt -r requirements-dev.txt --with-deps # Collect dependency wheels and their ABOUT files - -Ensure that the generated ABOUT files are valid, then take the dependency wheels -and ABOUT files and upload them to thirdparty.aboutcode.org/pypi. diff --git a/docs/source/_static/theme_overrides.css b/docs/source/_static/theme_overrides.css new file mode 100644 index 0000000..9662d63 --- /dev/null +++ b/docs/source/_static/theme_overrides.css @@ -0,0 +1,353 @@ +body { + color: #000000; +} + +p { + margin-bottom: 10px; +} + +.wy-plain-list-disc, .rst-content .section ul, .rst-content .toctree-wrapper ul, article ul { + margin-bottom: 10px; +} + +.custom_header_01 { + color: #cc0000; + font-size: 22px; + font-weight: bold; + line-height: 50px; +} + +h1, h2, h3, h4, h5, h6 { + margin-bottom: 20px; + margin-top: 20px; +} + +h5 { + font-size: 18px; + color: #000000; + font-style: italic; + margin-bottom: 10px; +} + +h6 { + font-size: 15px; + color: #000000; + font-style: italic; + margin-bottom: 10px; +} + +/* custom admonitions */ +/* success */ +.custom-admonition-success .admonition-title { + color: #000000; + background: #ccffcc; + border-radius: 5px 5px 0px 0px; +} +div.custom-admonition-success.admonition { + color: #000000; + background: #ffffff; + border: solid 1px #cccccc; + border-radius: 5px; + box-shadow: 1px 1px 5px 3px #d8d8d8; + margin: 20px 0px 30px 0px; +} + +/* important */ +.custom-admonition-important .admonition-title { + color: #000000; + background: #ccffcc; + border-radius: 5px 5px 0px 0px; + border-bottom: solid 1px #000000; +} +div.custom-admonition-important.admonition { + color: #000000; + background: #ffffff; + border: solid 1px #cccccc; + border-radius: 5px; + box-shadow: 1px 1px 5px 3px #d8d8d8; + margin: 20px 0px 30px 0px; +} + +/* caution */ +.custom-admonition-caution .admonition-title { + color: #000000; + background: #ffff99; + border-radius: 5px 5px 0px 0px; + border-bottom: solid 1px #e8e8e8; +} +div.custom-admonition-caution.admonition { + color: #000000; + background: #ffffff; + border: solid 1px #cccccc; + border-radius: 5px; + box-shadow: 1px 1px 5px 3px #d8d8d8; + margin: 20px 0px 30px 0px; +} + +/* note */ +.custom-admonition-note .admonition-title { + color: #ffffff; + background: #006bb3; + border-radius: 5px 5px 0px 0px; +} +div.custom-admonition-note.admonition { + color: #000000; + background: #ffffff; + border: solid 1px #cccccc; + border-radius: 5px; + box-shadow: 1px 1px 5px 3px #d8d8d8; + margin: 20px 0px 30px 0px; +} + +/* todo */ +.custom-admonition-todo .admonition-title { + color: #000000; + background: #cce6ff; + border-radius: 5px 5px 0px 0px; + border-bottom: solid 1px #99ccff; +} +div.custom-admonition-todo.admonition { + color: #000000; + background: #ffffff; + border: solid 1px #99ccff; + border-radius: 5px; + box-shadow: 1px 1px 5px 3px #d8d8d8; + margin: 20px 0px 30px 0px; +} + +/* examples */ +.custom-admonition-examples .admonition-title { + color: #000000; + background: #ffe6cc; + border-radius: 5px 5px 0px 0px; + border-bottom: solid 1px #d8d8d8; +} +div.custom-admonition-examples.admonition { + color: #000000; + background: #ffffff; + border: solid 1px #cccccc; + border-radius: 5px; + box-shadow: 1px 1px 5px 3px #d8d8d8; + margin: 20px 0px 30px 0px; +} + +.wy-nav-content { + max-width: 100%; + padding-right: 100px; + padding-left: 100px; + background-color: #f2f2f2; +} + +div.rst-content { + background-color: #ffffff; + border: solid 1px #e5e5e5; + padding: 20px 40px 20px 40px; +} + +.rst-content .guilabel { + border: 1px solid #ffff99; + background: #ffff99; + font-size: 100%; + font-weight: normal; + border-radius: 4px; + padding: 2px 0px; + margin: auto 2px; + vertical-align: middle; +} + +.rst-content kbd { + font-family: SFMono-Regular,Menlo,Monaco,Consolas,"Liberation Mono","Courier New",Courier,monospace; + border: solid 1px #d8d8d8; + background-color: #f5f5f5; + padding: 0px 3px; + border-radius: 3px; +} + +.wy-nav-content-wrap a { + color: #0066cc; + text-decoration: none; +} +.wy-nav-content-wrap a:hover { + color: #0099cc; + text-decoration: underline; +} + +.wy-nav-top a { + color: #ffffff; +} + +/* Based on numerous similar approaches e.g., https://github.com/readthedocs/sphinx_rtd_theme/issues/117 and https://rackerlabs.github.io/docs-rackspace/tools/rtd-tables.html -- but remove form-factor limits to enable table wrap on full-size and smallest-size form factors */ +.wy-table-responsive table td { + white-space: normal !important; +} + +.rst-content table.docutils td, +.rst-content table.docutils th { + padding: 5px 10px 5px 10px; +} +.rst-content table.docutils td p, +.rst-content table.docutils th p { + font-size: 14px; + margin-bottom: 0px; +} +.rst-content table.docutils td p cite, +.rst-content table.docutils th p cite { + font-size: 14px; + background-color: transparent; +} + +.colwidths-given th { + border: solid 1px #d8d8d8 !important; +} +.colwidths-given td { + border: solid 1px #d8d8d8 !important; +} + +/*handles single-tick inline code*/ +.wy-body-for-nav cite { + color: #000000; + background-color: transparent; + font-style: normal; + font-family: "Courier New"; + font-size: 13px; + padding: 3px 3px 3px 3px; +} + +.rst-content pre.literal-block, .rst-content div[class^="highlight"] pre, .rst-content .linenodiv pre { + font-family: SFMono-Regular,Menlo,Monaco,Consolas,"Liberation Mono","Courier New",Courier,monospace; + font-size: 13px; + overflow: visible; + white-space: pre-wrap; + color: #000000; +} + +.rst-content pre.literal-block, .rst-content div[class^='highlight'] { + background-color: #f8f8f8; + border: solid 1px #e8e8e8; +} + +/* This enables inline code to wrap. */ +code, .rst-content tt, .rst-content code { + white-space: pre-wrap; + padding: 2px 3px 1px; + border-radius: 3px; + font-size: 13px; + background-color: #ffffff; +} + +/* use this added class for code blocks attached to bulleted list items */ +.highlight-top-margin { + margin-top: 20px !important; +} + +/* change color of inline code block */ +span.pre { + color: #e01e5a; +} + +.wy-body-for-nav blockquote { + margin: 1em 0; + padding-left: 1em; + border-left: 4px solid #ddd; + color: #000000; +} + +/* Fix the unwanted top and bottom padding inside a nested bulleted/numbered list */ +.rst-content .section ol p, .rst-content .section ul p { + margin-bottom: 0px; +} + +/* add spacing between bullets for legibility */ +.rst-content .section ol li, .rst-content .section ul li { + margin-bottom: 5px; +} + +.rst-content .section ol li:first-child, .rst-content .section ul li:first-child { + margin-top: 5px; +} + +/* but exclude the toctree bullets */ +.rst-content .toctree-wrapper ul li, .rst-content .toctree-wrapper ul li:first-child { + margin-top: 0px; + margin-bottom: 0px; +} + +/* remove extra space at bottom of multine list-table cell */ +.rst-content .line-block { + margin-left: 0px; + margin-bottom: 0px; + line-height: 24px; +} + +/* fix extra vertical spacing in page toctree */ +.rst-content .toctree-wrapper ul li ul, article ul li ul { + margin-top: 0; + margin-bottom: 0; +} + +/* this is used by the genindex added via layout.html (see source/_templates/) to sidebar toc */ +.reference.internal.toc-index { + color: #d9d9d9; +} + +.reference.internal.toc-index.current { + background-color: #ffffff; + color: #000000; + font-weight: bold; +} + +.toc-index-div { + border-top: solid 1px #000000; + margin-top: 10px; + padding-top: 5px; +} + +.indextable ul li { + font-size: 14px; + margin-bottom: 5px; +} + +/* The next 2 fix the poor vertical spacing in genindex.html (the alphabetized index) */ +.indextable.genindextable { + margin-bottom: 20px; +} + +div.genindex-jumpbox { + margin-bottom: 10px; +} + +/* rst image classes */ + +.clear-both { + clear: both; + } + +.float-left { + float: left; + margin-right: 20px; +} + +img { + border: solid 1px #e8e8e8; +} + +/* These are custom and need to be defined in conf.py to access in all pages, e.g., '.. role:: red' */ +.img-title { + color: #000000; + /* neither padding nor margin works for vertical spacing bc it's a span -- line-height does, sort of */ + line-height: 3.0; + font-style: italic; + font-weight: 600; +} + +.img-title-para { + color: #000000; + margin-top: 20px; + margin-bottom: 0px; + font-style: italic; + font-weight: 500; +} + +.red { + color: red; +} diff --git a/docs/source/conf.py b/docs/source/conf.py index 529cae3..112fa55 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -17,9 +17,9 @@ # -- Project information ----------------------------------------------------- -project = 'nexb-skeleton' -copyright = 'nexb Inc.' -author = 'nexb Inc.' +project = "ExtractCode" +copyright = "nexB Inc. and others." +author = "AboutCode.org authors and contributors" # -- General configuration --------------------------------------------------- @@ -28,10 +28,21 @@ # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ +'sphinx.ext.intersphinx', ] +# This points to aboutcode.readthedocs.io +# In case of "undefined label" ERRORS check docs on intersphinx to troubleshoot +# Link was created at commit - https://github.com/nexB/aboutcode/commit/faea9fcf3248f8f198844fe34d43833224ac4a83 + +intersphinx_mapping = { + 'aboutcode': ('https://aboutcode.readthedocs.io/en/latest/', None), + 'scancode-workbench': ('https://scancode-workbench.readthedocs.io/en/develop/', None), +} + + # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. @@ -44,20 +55,45 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -html_theme = 'sphinx_rtd_theme' +html_theme = "sphinx_rtd_theme" # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +html_static_path = ["_static"] + +master_doc = 'index' html_context = { - 'css_files': [ - '_static/theme_overrides.css', # override wide tables in RTD theme - ], "display_github": True, "github_user": "nexB", - "github_repo": "nexb-skeleton", - "github_version": "develop", # branch + "github_repo": "extractcode", + "github_version": "main", # branch "conf_py_path": "/docs/source/", # path in the checkout to the docs root - } \ No newline at end of file +} + +html_css_files = [ + '_static/theme_overrides.css' + ] + + +# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. +html_show_sphinx = True + +# Define CSS and HTML abbreviations used in .rst files. These are examples. +# .. role:: is used to refer to styles defined in _static/theme_overrides.css and is used like this: :red:`text` +rst_prolog = """ +.. |psf| replace:: Python Software Foundation + +.. # define a hard line break for HTML +.. |br| raw:: html + +
+ +.. role:: red + +.. role:: img-title + +.. role:: img-title-para + +""" diff --git a/docs/source/contribute/contrib_doc.rst b/docs/source/contribute/contrib_doc.rst new file mode 100644 index 0000000..13882e1 --- /dev/null +++ b/docs/source/contribute/contrib_doc.rst @@ -0,0 +1,314 @@ +.. _contrib_doc_dev: + +Contributing to the Documentation +================================= + +.. _contrib_doc_setup_local: + +Setup Local Build +----------------- + +To get started, create or identify a working directory on your local machine. + +Open that directory and execute the following command in a terminal session:: + + git clone https://github.com/nexB/skeleton.git + +That will create an ``/skeleton`` directory in your working directory. +Now you can install the dependencies in a virtualenv:: + + cd skeleton + ./configure --docs + +.. note:: + + In case of windows, run ``configure --docs`` instead of this. + +Now, this will install the following prerequisites: + +- Sphinx +- sphinx_rtd_theme (the format theme used by ReadTheDocs) +- docs8 (style linter) + +These requirements are already present in setup.cfg and `./configure --docs` installs them. + +Now you can build the HTML documents locally:: + + source venv/bin/activate + cd docs + make html + +Assuming that your Sphinx installation was successful, Sphinx should build a local instance of the +documentation .html files:: + + open build/html/index.html + +.. note:: + + In case this command did not work, for example on Ubuntu 18.04 you may get a message like “Couldn’t + get a file descriptor referring to the console”, try: + + :: + + see build/html/index.html + +You now have a local build of the AboutCode documents. + +.. _contrib_doc_share_improvements: + +Share Document Improvements +--------------------------- + +Ensure that you have the latest files:: + + git pull + git status + +Before commiting changes run Continious Integration Scripts locally to run tests. Refer +:ref:`doc_ci` for instructions on the same. + +Follow standard git procedures to upload your new and modified files. The following commands are +examples:: + + git status + git add source/index.rst + git add source/how-to-scan.rst + git status + git commit -m "New how-to document that explains how to scan" + git status + git push + git status + +The Scancode-Toolkit webhook with ReadTheDocs should rebuild the documentation after your +Pull Request is Merged. + +Refer the `Pro Git Book `_ available online for Git tutorials +covering more complex topics on Branching, Merging, Rebasing etc. + +.. _doc_ci: + +Continuous Integration +---------------------- + +The documentations are checked on every new commit through Travis-CI, so that common errors are +avoided and documentation standards are enforced. Travis-CI presently checks for these 3 aspects +of the documentation : + +1. Successful Builds (By using ``sphinx-build``) +2. No Broken Links (By Using ``link-check``) +3. Linting Errors (By Using ``Doc8``) + +So run these scripts at your local system before creating a Pull Request:: + + cd docs + ./scripts/sphinx_build_link_check.sh + ./scripts/doc8_style_check.sh + +If you don't have permission to run the scripts, run:: + + chmod u+x ./scripts/doc8_style_check.sh + +.. _doc_style_docs8: + +Style Checks Using ``Doc8`` +--------------------------- + +How To Run Style Tests +^^^^^^^^^^^^^^^^^^^^^^ + +In the project root, run the following commands:: + + $ cd docs + $ ./scripts/doc8_style_check.sh + +A sample output is:: + + Scanning... + Validating... + docs/source/misc/licence_policy_plugin.rst:37: D002 Trailing whitespace + docs/source/misc/faq.rst:45: D003 Tabulation used for indentation + docs/source/misc/faq.rst:9: D001 Line too long + docs/source/misc/support.rst:6: D005 No newline at end of file + ======== + Total files scanned = 34 + Total files ignored = 0 + Total accumulated errors = 326 + Detailed error counts: + - CheckCarriageReturn = 0 + - CheckIndentationNoTab = 75 + - CheckMaxLineLength = 190 + - CheckNewlineEndOfFile = 13 + - CheckTrailingWhitespace = 47 + - CheckValidity = 1 + +Now fix the errors and run again till there isn't any style error in the documentation. + +What is Checked? +^^^^^^^^^^^^^^^^ + +PyCQA is an Organization for code quality tools (and plugins) for the Python programming language. +Doc8 is a sub-project of the same Organization. Refer this `README `_ for more details. + +What is checked: + + - invalid rst format - D000 + - lines should not be longer than 100 characters - D001 + + - RST exception: line with no whitespace except in the beginning + - RST exception: lines with http or https URLs + - RST exception: literal blocks + - RST exception: rst target directives + + - no trailing whitespace - D002 + - no tabulation for indentation - D003 + - no carriage returns (use UNIX newlines) - D004 + - no newline at end of file - D005 + +.. _doc_interspinx: + +Interspinx +---------- + +ScanCode toolkit documentation uses `Intersphinx `_ +to link to other Sphinx Documentations, to maintain links to other Aboutcode Projects. + +To link sections in the same documentation, standart reST labels are used. Refer +`Cross-Referencing `_ for more information. + +For example:: + + .. _my-reference-label: + + Section to cross-reference + -------------------------- + + This is the text of the section. + + It refers to the section itself, see :ref:`my-reference-label`. + +Now, using Intersphinx, you can create these labels in one Sphinx Documentation and then referance +these labels from another Sphinx Documentation, hosted in different locations. + +You just have to add the following in the ``conf.py`` file for your Sphinx Documentation, where you +want to add the links:: + + extensions = [ + 'sphinx.ext.intersphinx' + ] + + intersphinx_mapping = {'aboutcode': ('https://aboutcode.readthedocs.io/en/latest/', None)} + +To show all Intersphinx links and their targets of an Intersphinx mapping file, run:: + + python -msphinx.ext.intersphinx https://aboutcode.readthedocs.io/en/latest/objects.inv + +.. WARNING:: + + ``python -msphinx.ext.intersphinx https://aboutcode.readthedocs.io/objects.inv`` will give + error. + +This enables you to create links to the ``aboutcode`` Documentation in your own Documentation, +where you modified the configuration file. Links can be added like this:: + + For more details refer :ref:`aboutcode:doc_style_guide`. + +You can also not use the ``aboutcode`` label assigned to all links from aboutcode.readthedocs.io, +if you don't have a label having the same name in your Sphinx Documentation. Example:: + + For more details refer :ref:`doc_style_guide`. + +If you have a label in your documentation which is also present in the documentation linked by +Intersphinx, and you link to that label, it will create a link to the local label. + +For more information, refer this tutorial named +`Using Intersphinx `_. + +.. _doc_style_conv: + +Style Conventions for the Documentaion +-------------------------------------- + +1. Headings + + (`Refer `_) + Normally, there are no heading levels assigned to certain characters as the structure is + determined from the succession of headings. However, this convention is used in Python’s Style + Guide for documenting which you may follow: + + # with overline, for parts + + * with overline, for chapters + + =, for sections + + -, for subsections + + ^, for sub-subsections + + ", for paragraphs + +2. Heading Underlines + + Do not use underlines that are longer/shorter than the title headline itself. As in: + + :: + + Correct : + + Extra Style Checks + ------------------ + + Incorrect : + + Extra Style Checks + ------------------------ + +.. note:: + + Underlines shorter than the Title text generates Errors on sphinx-build. + + +3. Internal Links + + Using ``:ref:`` is advised over standard reStructuredText links to sections (like + ```Section title`_``) because it works across files, when section headings are changed, will + raise warnings if incorrect, and works for all builders that support cross-references. + However, external links are created by using the standard ```Section title`_`` method. + +4. Eliminate Redundancy + + If a section/file has to be repeated somewhere else, do not write the exact same section/file + twice. Use ``.. include: ../README.rst`` instead. Here, ``../`` refers to the documentation + root, so file location can be used accordingly. This enables us to link documents from other + upstream folders. + +5. Using ``:ref:`` only when necessary + + Use ``:ref:`` to create internal links only when needed, i.e. it is referenced somewhere. + Do not create references for all the sections and then only reference some of them, because + this created unnecessary references. This also generates ERROR in ``restructuredtext-lint``. + +6. Spelling + + You should check for spelling errors before you push changes. `Aspell `_ + is a GNU project Command Line tool you can use for this purpose. Download and install Aspell, + then execute ``aspell check `` for all the files changed. Be careful about not + changing commands or other stuff as Aspell gives prompts for a lot of them. Also delete the + temporary ``.bak`` files generated. Refer the `manual `_ for more + information on how to use. + +7. Notes and Warning Snippets + + Every ``Note`` and ``Warning`` sections are to be kept in ``rst_snippets/note_snippets/`` and + ``rst_snippets/warning_snippets/`` and then included to eliminate redundancy, as these are + frequently used in multiple files. + +Converting from Markdown +------------------------ + +If you want to convert a ``.md`` file to a ``.rst`` file, this `tool `_ +does it pretty well. You'd still have to clean up and check for errors as this contains a lot of +bugs. But this is definitely better than converting everything by yourself. + +This will be helpful in converting GitHub wiki's (Markdown Files) to reStructuredtext files for +Sphinx/ReadTheDocs hosting. diff --git a/docs/source/index.rst b/docs/source/index.rst index 67fcf21..68099e7 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -5,7 +5,7 @@ Welcome to nexb-skeleton's documentation! :maxdepth: 2 :caption: Contents: - skeleton/index + contribute/contrib_doc Indices and tables ================== diff --git a/docs/source/skeleton/index.rst b/docs/source/skeleton/index.rst deleted file mode 100644 index 7dfc6cb..0000000 --- a/docs/source/skeleton/index.rst +++ /dev/null @@ -1,15 +0,0 @@ -# Docs Structure Guide -# Rst docs - https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html -# -# 1. Place docs in folders under source for different sections -# 2. Link them by adding individual index files in each section -# to the main index, and then files for each section to their -# respective index files. -# 3. Use `.. include` statements to include other .rst files -# or part of them, or use hyperlinks to a section of the docs, -# to get rid of repetition. -# https://docutils.sourceforge.io/docs/ref/rst/directives.html#including-an-external-document-fragment -# -# Note: Replace these guide/placeholder docs - -.. include:: ../../../README.rst diff --git a/etc/ci/azure-container-deb.yml b/etc/ci/azure-container-deb.yml new file mode 100644 index 0000000..85b611d --- /dev/null +++ b/etc/ci/azure-container-deb.yml @@ -0,0 +1,50 @@ +parameters: + job_name: '' + container: '' + python_path: '' + python_version: '' + package_manager: apt-get + install_python: '' + install_packages: | + set -e -x + sudo apt-get -y update + sudo apt-get -y install \ + build-essential \ + xz-utils zlib1g bzip2 libbz2-1.0 tar \ + sqlite3 libxml2-dev libxslt1-dev \ + software-properties-common openssl + test_suite: '' + test_suite_label: '' + + +jobs: + - job: ${{ parameters.job_name }} + + pool: + vmImage: 'ubuntu-16.04' + + container: + image: ${{ parameters.container }} + options: '--name ${{ parameters.job_name }} -e LANG=C.UTF-8 -e LC_ALL=C.UTF-8 -v /usr/bin/docker:/tmp/docker:ro' + + steps: + - checkout: self + fetchDepth: 10 + + - script: /tmp/docker exec -t -e LANG=C.UTF-8 -e LC_ALL=C.UTF-8 -u 0 ${{ parameters.job_name }} $(Build.SourcesDirectory)/etc/ci/install_sudo.sh ${{ parameters.package_manager }} + displayName: Install sudo + + - script: ${{ parameters.install_packages }} + displayName: Install required packages + + - script: ${{ parameters.install_python }} + displayName: 'Install Python ${{ parameters.python_version }}' + + - script: ${{ parameters.python_path }} --version + displayName: 'Show Python version' + + - script: PYTHON_EXE=${{ parameters.python_path }} ./configure --dev + displayName: 'Run Configure' + + - script: ${{ parameters.test_suite }} + displayName: 'Run ${{ parameters.test_suite_label }} tests with py${{ parameters.python_version }} on ${{ parameters.job_name }}' diff --git a/etc/ci/azure-container-rpm.yml b/etc/ci/azure-container-rpm.yml new file mode 100644 index 0000000..1e6657d --- /dev/null +++ b/etc/ci/azure-container-rpm.yml @@ -0,0 +1,51 @@ +parameters: + job_name: '' + image_name: 'ubuntu-16.04' + container: '' + python_path: '' + python_version: '' + package_manager: yum + install_python: '' + install_packages: | + set -e -x + sudo yum groupinstall -y "Development Tools" + sudo yum install -y \ + openssl openssl-devel \ + sqlite-devel zlib-devel xz-devel bzip2-devel \ + bzip2 tar unzip zip \ + libxml2-devel libxslt-devel + test_suite: '' + test_suite_label: '' + + +jobs: + - job: ${{ parameters.job_name }} + + pool: + vmImage: ${{ parameters.image_name }} + + container: + image: ${{ parameters.container }} + options: '--name ${{ parameters.job_name }} -e LANG=C.UTF-8 -e LC_ALL=C.UTF-8 -v /usr/bin/docker:/tmp/docker:ro' + + steps: + - checkout: self + fetchDepth: 10 + + - script: /tmp/docker exec -t -e LANG=C.UTF-8 -e LC_ALL=C.UTF-8 -u 0 ${{ parameters.job_name }} $(Build.SourcesDirectory)/etc/ci/install_sudo.sh ${{ parameters.package_manager }} + displayName: Install sudo + + - script: ${{ parameters.install_packages }} + displayName: Install required packages + + - script: ${{ parameters.install_python }} + displayName: 'Install Python ${{ parameters.python_version }}' + + - script: ${{ parameters.python_path }} --version + displayName: 'Show Python version' + + - script: PYTHON_EXE=${{ parameters.python_path }} ./configure --dev + displayName: 'Run Configure' + + - script: ${{ parameters.test_suite }} + displayName: 'Run ${{ parameters.test_suite_label }} tests with py${{ parameters.python_version }} on ${{ parameters.job_name }}' diff --git a/etc/ci/azure-posix.yml b/etc/ci/azure-posix.yml index 0921d9b..9fdc7f1 100644 --- a/etc/ci/azure-posix.yml +++ b/etc/ci/azure-posix.yml @@ -13,10 +13,8 @@ jobs: strategy: matrix: - ${{ each pyver in parameters.python_versions }}: ${{ each tsuite in parameters.test_suites }}: - ${{ format('py{0} {1}', pyver, tsuite.key) }}: - python_version: ${{ pyver }} + ${{ tsuite.key }}: test_suite_label: ${{ tsuite.key }} test_suite: ${{ tsuite.value }} @@ -24,18 +22,18 @@ jobs: - checkout: self fetchDepth: 10 - - task: UsePythonVersion@0 - inputs: - versionSpec: '$(python_version)' - architecture: '${{ parameters.python_architecture }}' - displayName: 'Install Python $(python_version)' + - ${{ each pyver in parameters.python_versions }}: + - task: UsePythonVersion@0 + inputs: + versionSpec: '${{ pyver }}' + architecture: '${{ parameters.python_architecture }}' + displayName: '${{ pyver }} - Install Python' - - script: | - python3 --version - python$(python_version) --version - echo "python$(python_version)" > PYTHON_EXECUTABLE - ./configure --dev - displayName: 'Run Configure' + - script: | + python${{ pyver }} --version + echo "python${{ pyver }}" > PYTHON_EXECUTABLE + ./configure --clean && ./configure --dev + displayName: '${{ pyver }} - Configure' - - script: $(test_suite) - displayName: 'Run $(test_suite_label) tests with py$(python_version) on ${{ parameters.job_name }}' + - script: $(test_suite) + displayName: '${{ pyver }} - $(test_suite_label) on ${{ parameters.job_name }}' diff --git a/etc/ci/azure-win.yml b/etc/ci/azure-win.yml index 03d8927..26b4111 100644 --- a/etc/ci/azure-win.yml +++ b/etc/ci/azure-win.yml @@ -13,27 +13,27 @@ jobs: strategy: matrix: - ${{ each pyver in parameters.python_versions }}: ${{ each tsuite in parameters.test_suites }}: - ${{ format('py{0} {1}', pyver, tsuite.key) }}: - python_version: ${{ pyver }} + ${{ tsuite.key }}: test_suite_label: ${{ tsuite.key }} test_suite: ${{ tsuite.value }} + steps: - checkout: self fetchDepth: 10 - - task: UsePythonVersion@0 - inputs: - versionSpec: '$(python_version)' - architecture: '${{ parameters.python_architecture }}' - displayName: 'Install Python $(python_version)' + - ${{ each pyver in parameters.python_versions }}: + - task: UsePythonVersion@0 + inputs: + versionSpec: '${{ pyver }}' + architecture: '${{ parameters.python_architecture }}' + displayName: '${{ pyver }} - Install Python' - - script: | - python --version - echo | set /p=python> PYTHON_EXECUTABLE - configure --dev - displayName: 'Run Configure' + - script: | + python --version + echo | set /p=python> PYTHON_EXECUTABLE + configure --clean && configure --dev + displayName: '${{ pyver }} - Configure' - - script: $(test_suite) - displayName: 'Run $(test_suite_label) tests with py$(python_version) on ${{ parameters.job_name }}' + - script: $(test_suite) + displayName: '${{ pyver }} - $(test_suite_label) on ${{ parameters.job_name }}' diff --git a/etc/ci/install_sudo.sh b/etc/ci/install_sudo.sh new file mode 100644 index 0000000..77f4210 --- /dev/null +++ b/etc/ci/install_sudo.sh @@ -0,0 +1,15 @@ +#!/bin/bash +set -e + + +if [[ "$1" == "apt-get" ]]; then + apt-get update -y + apt-get -o DPkg::Options::="--force-confold" install -y sudo + +elif [[ "$1" == "yum" ]]; then + yum install -y sudo + +elif [[ "$1" == "dnf" ]]; then + dnf install -y sudo + +fi diff --git a/etc/ci/macports-ci b/etc/ci/macports-ci new file mode 100644 index 0000000..ac474e4 --- /dev/null +++ b/etc/ci/macports-ci @@ -0,0 +1,304 @@ +#! /bin/bash + +# Copyright (c) 2019 Giovanni Bussi + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +export COLUMNS=80 + +if [ "$GITHUB_ACTIONS" = true ] ; then + echo "COLUMNS=$COLUMNS" >> "$GITHUB_ENV" +fi + +# file to be source at the end of subshell: +export MACPORTS_CI_SOURCEME="$(mktemp)" + +( +# start subshell +# this allows to use the script in two ways: +# 1. as ./macports-ci +# 2. as source ./macports-ci +# as of now, choice 2 only changes the env var COLUMNS. + +MACPORTS_VERSION=2.6.4 +MACPORTS_PREFIX=/opt/local +MACPORTS_SYNC=tarball + +action=$1 +shift + +case "$action" in +(install) + +echo "macports-ci: install" + +KEEP_BREW=yes + +for opt +do + case "$opt" in + (--source) SOURCE=yes ;; + (--binary) SOURCE=no ;; + (--keep-brew) KEEP_BREW=yes ;; + (--remove-brew) KEEP_BREW=no ;; + (--version=*) MACPORTS_VERSION="${opt#--version=}" ;; + (--prefix=*) MACPORTS_PREFIX="${opt#--prefix=}" ;; + (--sync=*) MACPORTS_SYNC="${opt#--sync=}" ;; + (*) echo "macports-ci: unknown option $opt" + exit 1 ;; + esac +done + +if test "$KEEP_BREW" = no ; then + echo "macports-ci: removing homebrew" + pushd "$(mktemp -d)" + curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/uninstall > uninstall + chmod +x uninstall + ./uninstall --force + popd +else + echo "macports-ci: keeping HomeBrew" +fi + +echo "macports-ci: prefix=$MACPORTS_PREFIX" + +if test "$MACPORTS_PREFIX" != /opt/local ; then + echo "macports-ci: Installing on non standard prefix $MACPORTS_PREFIX can be only made from sources" + SOURCE=yes +fi + +if test "$SOURCE" = yes ; then + echo "macports-ci: Installing from source" +else + echo "macports-ci: Installing from binary" +fi + +echo "macports-ci: Sync mode=$MACPORTS_SYNC" + +pushd "$(mktemp -d)" + +OSX_VERSION="$(sw_vers -productVersion | grep -o '^[0-9][0-9]*\.[0-9][0-9]*')" + +if test "$OSX_VERSION" == 10.10 ; then + OSX_NAME=Yosemite +elif test "$OSX_VERSION" == 10.11 ; then + OSX_NAME=ElCapitan +elif test "$OSX_VERSION" == 10.12 ; then + OSX_NAME=Sierra +elif test "$OSX_VERSION" == 10.13 ; then + OSX_NAME=HighSierra +elif test "$OSX_VERSION" == 10.14 ; then + OSX_NAME=Mojave +elif test "$OSX_VERSION" == 10.15 ; then + OSX_NAME=Catalina +else + echo "macports-ci: Unknown OSX version $OSX_VERSION" + exit 1 +fi + +echo "macports-ci: OSX version $OSX_VERSION $OSX_NAME" + +MACPORTS_PKG=MacPorts-${MACPORTS_VERSION}-${OSX_VERSION}-${OSX_NAME}.pkg + +# this is a workaround needed because binary installer MacPorts-2.6.3-10.12-Sierra.pkg is broken +if [ "$SOURCE" != yes ] && [ "$MACPORTS_PKG" = "MacPorts-2.6.3-10.12-Sierra.pkg" ] ; then + echo "macports-ci: WARNING $MACPORTS_PKG installer is broken" + echo "macports-ci: reverting to 2.6.2 installer followed by selfupdate" + MACPORTS_VERSION=2.6.2 + MACPORTS_PKG=MacPorts-${MACPORTS_VERSION}-${OSX_VERSION}-${OSX_NAME}.pkg +fi + +URL="https://distfiles.macports.org/MacPorts" +URL="https://github.com/macports/macports-base/releases/download/v$MACPORTS_VERSION/" + +echo "macports-ci: Base URL is $URL" + +if test "$SOURCE" = yes ; then +# download source: + curl -LO $URL/MacPorts-${MACPORTS_VERSION}.tar.bz2 + tar xjf MacPorts-${MACPORTS_VERSION}.tar.bz2 + cd MacPorts-${MACPORTS_VERSION} +# install + ./configure --prefix="$MACPORTS_PREFIX" --with-applications-dir="$MACPORTS_PREFIX/Applications" >/dev/null && + sudo make install >/dev/null +else + +# download installer: + curl -LO $URL/$MACPORTS_PKG +# install: + sudo installer -verbose -pkg $MACPORTS_PKG -target / +fi + +# update: +export PATH="$MACPORTS_PREFIX/bin:$PATH" + +echo "PATH=\"$MACPORTS_PREFIX/bin:\$PATH\"" > "$MACPORTS_CI_SOURCEME" + +if [ "$GITHUB_ACTIONS" = true ] ; then + echo "$MACPORTS_PREFIX/bin" >> "$GITHUB_PATH" +fi + + +SOURCES="${MACPORTS_PREFIX}"/etc/macports/sources.conf + +case "$MACPORTS_SYNC" in +(rsync) + echo "macports-ci: Using rsync" + ;; +(github) + echo "macports-ci: Using github" + pushd "$MACPORTS_PREFIX"/var/macports/sources + sudo mkdir -p github.com/macports/macports-ports/ + sudo chown -R $USER:admin github.com + git clone https://github.com/macports/macports-ports.git github.com/macports/macports-ports/ + awk '{if($NF=="[default]") print "file:///opt/local/var/macports/sources/github.com/macports/macports-ports/"; else print}' "$SOURCES" > $HOME/$$.tmp + sudo mv -f $HOME/$$.tmp "$SOURCES" + popd + ;; +(tarball) + echo "macports-ci: Using tarball" + awk '{if($NF=="[default]") print "https://distfiles.macports.org/ports.tar.gz [default]"; else print}' "$SOURCES" > $$.tmp + sudo mv -f $$.tmp "$SOURCES" + ;; +(*) + echo "macports-ci: Unknown sync mode $MACPORTS_SYNC" + ;; +esac + +i=1 +# run through a while to retry upon failure +while true +do + echo "macports-ci: Trying to selfupdate (iteration $i)" +# here I test for the presence of a known portfile +# this check confirms that ports were installed +# notice that port -N selfupdate && break is not sufficient as a test +# (sometime it returns a success even though ports have not been installed) +# for some misterious reasons, running without "-d" does not work in some case + sudo port -d -N selfupdate 2>&1 | grep -v DEBUG | awk '{if($1!="x")print}' + port info xdrfile > /dev/null && break || true + sleep 5 + i=$((i+1)) + if ((i>20)) ; then + echo "macports-ci: Failed after $i iterations" + exit 1 + fi +done + +echo "macports-ci: Selfupdate successful after $i iterations" + +dir="$PWD" +popd +sudo rm -fr $dir + +;; + +(localports) + +echo "macports-ci: localports" + +for opt +do + case "$opt" in + (*) ports="$opt" ;; + esac +done + +if ! test -d "$ports" ; then + echo "macports-ci: Please provide a port directory" + exit 1 +fi + +w=$(which port) + +MACPORTS_PREFIX="${w%/bin/port}" + +cd "$ports" + +ports="$(pwd)" + +echo "macports-ci: Portdir fullpath: $ports" +SOURCES="${MACPORTS_PREFIX}"/etc/macports/sources.conf + +awk -v repo="file://$ports" '{if($NF=="[default]") print repo; print}' "$SOURCES" > $$.tmp +sudo mv -f $$.tmp "$SOURCES" + +portindex + +;; + +(ccache) +w=$(which port) +MACPORTS_PREFIX="${w%/bin/port}" + +echo "macports-ci: ccache" + +ccache_do=install + +for opt +do + case "$opt" in + (--save) ccache_do=save ;; + (--install) ccache_do=install ;; + (*) echo "macports-ci: ccache: unknown option $opt" + exit 1 ;; + esac +done + + +case "$ccache_do" in +(install) +# first install ccache +sudo port -N install ccache +# then tell macports to use it +CONF="${MACPORTS_PREFIX}"/etc/macports/macports.conf +awk '{if(match($0,"configureccache")) print "configureccache yes" ; else print }' "$CONF" > $$.tmp +sudo mv -f $$.tmp "$CONF" + +# notice that cache size is set to 512Mb, same as it is set by Travis-CI on linux +# might be changed in the future +test -f "$HOME"/.macports-ci-ccache/ccache.conf && + sudo rm -fr "$MACPORTS_PREFIX"/var/macports/build/.ccache && + sudo mkdir -p "$MACPORTS_PREFIX"/var/macports/build/.ccache && + sudo cp -a "$HOME"/.macports-ci-ccache/* "$MACPORTS_PREFIX"/var/macports/build/.ccache/ && + sudo echo "max_size = 512M" > "$MACPORTS_PREFIX"/var/macports/build/.ccache/ccache.conf && + sudo chown -R macports:admin "$MACPORTS_PREFIX"/var/macports/build/.ccache + +;; +(save) + +sudo rm -fr "$HOME"/.macports-ci-ccache +sudo mkdir -p "$HOME"/.macports-ci-ccache +sudo cp -a "$MACPORTS_PREFIX"/var/macports/build/.ccache/* "$HOME"/.macports-ci-ccache/ + +esac + +CCACHE_DIR="$MACPORTS_PREFIX"/var/macports/build/.ccache/ ccache -s + +;; + +(*) +echo "macports-ci: unknown action $action" + +esac + +) + +# allows setting env var if necessary: +source "$MACPORTS_CI_SOURCEME" diff --git a/etc/ci/macports-ci.ABOUT b/etc/ci/macports-ci.ABOUT new file mode 100644 index 0000000..60a11f8 --- /dev/null +++ b/etc/ci/macports-ci.ABOUT @@ -0,0 +1,16 @@ +about_resource: macports-ci +name: macports-ci +version: c9676e67351a3a519e37437e196cd0ee9c2180b8 +download_url: https://raw.githubusercontent.com/GiovanniBussi/macports-ci/c9676e67351a3a519e37437e196cd0ee9c2180b8/macports-ci +description: Simplify MacPorts setup on Travis-CI +homepage_url: https://github.com/GiovanniBussi/macports-ci +license_expression: mit +copyright: Copyright (c) Giovanni Bussi +attribute: yes +checksum_md5: 5d31d479132502f80acdaed78bed9e23 +checksum_sha1: 74b15643bd1a528d91b4a7c2169c6fc656f549c2 +package_url: pkg:github/giovannibussi/macports-ci@c9676e67351a3a519e37437e196cd0ee9c2180b8#macports-ci +licenses: + - key: mit + name: MIT License + file: mit.LICENSE diff --git a/etc/ci/mit.LICENSE b/etc/ci/mit.LICENSE new file mode 100644 index 0000000..e662c78 --- /dev/null +++ b/etc/ci/mit.LICENSE @@ -0,0 +1,5 @@ +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. \ No newline at end of file diff --git a/etc/scripts/README.rst b/etc/scripts/README.rst index 4cb6ec7..edf82e4 100755 --- a/etc/scripts/README.rst +++ b/etc/scripts/README.rst @@ -1,10 +1,6 @@ -This directory contains the tools to: - -- manage a directory of thirdparty Python package source, wheels and metadata: - pin, build, update, document and publish to a PyPI-like repo (GitHub release) - -- build and publish scancode releases as wheel, sources and OS-specific bundles. - +This directory contains the tools to manage a directory of thirdparty Python +package source, wheels and metadata pin, build, update, document and publish to +a PyPI-like repo (GitHub release). NOTE: These are tested to run ONLY on Linux. @@ -19,10 +15,10 @@ Pre-requisites * To generate or update pip requirement files, you need to start with a clean virtualenv as instructed below (This is to avoid injecting requirements - specific to the tools here in the main requirements). + specific to the tools used here in the main requirements). * For other usages, the tools here can run either in their own isolated - virtualenv best or in the the main configured development virtualenv. + virtualenv or in the the main configured development virtualenv. These requireements need to be installed:: pip install --requirement etc/release/requirements.txt @@ -38,7 +34,7 @@ Scripts ~~~~~~~ **gen_requirements.py**: create/update requirements files from currently - installed requirements. + installed requirements. **gen_requirements_dev.py** does the same but can subtract the main requirements to get extra requirements used in only development. @@ -86,45 +82,14 @@ Populate a thirdparty directory with wheels, sources, .ABOUT and license files Scripts ~~~~~~~ -* **fetch_requirements.py** will fetch package wheels, their ABOUT, LICENSE and - NOTICE files to populate a local a thirdparty directory strictly from our - remote repo and using only pinned packages listed in one or more pip - requirements file(s). Fetch only requirements for specific python versions and - operating systems. Optionally fetch the corresponding source distributions. - -* **publish_files.py** will upload/sync a thirdparty directory of files to our - remote repo. Requires a GitHub personal access token. - -* **build_wheels.py** will build a package binary wheel for multiple OS and - python versions. Optionally wheels that contain native code are built - remotely. Dependent wheels are optionally included. Requires Azure credentials - and tokens if building wheels remotely on multiple operatin systems. - -* **fix_thirdparty.py** will fix a thirdparty directory with a best effort to - add missing wheels, sources archives, create or fetch or fix .ABOUT, .NOTICE - and .LICENSE files. Requires Azure credentials and tokens if requesting the - build of missing wheels remotely on multiple operatin systems. +* **fetch_thirdparty.py** will fetch package wheels, source sdist tarballs + and their ABOUT, LICENSE and NOTICE files to populate a local directory from + a list of PyPI simple URLs (typically PyPI.org proper and our self-hosted PyPI) + using pip requirements file(s), specifiers or pre-existing packages files. + Fetch wheels for specific python version and operating system combinations. * **check_thirdparty.py** will check a thirdparty directory for errors. -* **bootstrap.py** will bootstrap a thirdparty directory from a requirements - file(s) to add or build missing wheels, sources archives and create .ABOUT, - .NOTICE and .LICENSE files. Requires Azure credentials and tokens if - requesting the build of missing wheels remotely on multiple operatin systems. - - - -Usage -~~~~~ - -See each command line --help option for details. - -* (TODO) **add_package.py** will add or update a Python package including wheels, - sources and ABOUT files and this for multiple Python version and OSes(for use - with upload_packages.py afterwards) You will need an Azure personal access - token for buidling binaries and an optional DejaCode API key to post and fetch - new package versions there. TODO: explain how we use romp - Upgrade virtualenv app ---------------------- diff --git a/etc/scripts/bootstrap.py b/etc/scripts/bootstrap.py deleted file mode 100644 index fde505b..0000000 --- a/etc/scripts/bootstrap.py +++ /dev/null @@ -1,212 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# -# Copyright (c) nexB Inc. and others. All rights reserved. -# ScanCode is a trademark of nexB Inc. -# SPDX-License-Identifier: Apache-2.0 -# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/skeleton for support or download. -# See https://aboutcode.org for more information about nexB OSS projects. -# - -import itertools - -import click - -import utils_thirdparty -from utils_thirdparty import Environment -from utils_thirdparty import PypiPackage - - -@click.command() - -@click.option('-r', '--requirements-file', - type=click.Path(exists=True, readable=True, path_type=str, dir_okay=False), - metavar='FILE', - multiple=True, - default=['requirements.txt'], - show_default=True, - help='Path to the requirements file(s) to use for thirdparty packages.', -) -@click.option('-d', '--thirdparty-dir', - type=click.Path(exists=True, readable=True, path_type=str, file_okay=False), - metavar='DIR', - default=utils_thirdparty.THIRDPARTY_DIR, - show_default=True, - help='Path to the thirdparty directory where wheels are built and ' - 'sources, ABOUT and LICENSE files fetched.', -) -@click.option('-p', '--python-version', - type=click.Choice(utils_thirdparty.PYTHON_VERSIONS), - metavar='PYVER', - default=utils_thirdparty.PYTHON_VERSIONS, - show_default=True, - multiple=True, - help='Python version(s) to use for this build.', -) -@click.option('-o', '--operating-system', - type=click.Choice(utils_thirdparty.PLATFORMS_BY_OS), - metavar='OS', - default=tuple(utils_thirdparty.PLATFORMS_BY_OS), - multiple=True, - show_default=True, - help='OS(ses) to use for this build: one of linux, mac or windows.', -) -@click.option('-l', '--latest-version', - is_flag=True, - help='Get the latest version of all packages, ignoring version specifiers.', -) -@click.option('--sync-dejacode', - is_flag=True, - help='Synchronize packages with DejaCode.', -) -@click.option('--with-deps', - is_flag=True, - help='Also include all dependent wheels.', -) -@click.help_option('-h', '--help') -def bootstrap( - requirements_file, - thirdparty_dir, - python_version, - operating_system, - with_deps, - latest_version, - sync_dejacode, - build_remotely=False, -): - """ - Boostrap a thirdparty Python packages directory from pip requirements. - - Fetch or build to THIRDPARTY_DIR all the wheels and source distributions for - the pip ``--requirement-file`` requirements FILE(s). Build wheels compatible - with all the provided ``--python-version`` PYVER(s) and ```--operating_system`` - OS(s) defaulting to all supported combinations. Create or fetch .ABOUT and - .LICENSE files. - - Optionally ignore version specifiers and use the ``--latest-version`` - of everything. - - Sources and wheels are fetched with attempts first from PyPI, then our remote repository. - If missing wheels are built as needed. - """ - # rename variables for clarity since these are lists - requirements_files = requirements_file - python_versions = python_version - operating_systems = operating_system - - # create the environments we need - evts = itertools.product(python_versions, operating_systems) - environments = [Environment.from_pyver_and_os(pyv, os) for pyv, os in evts] - - # collect all packages to process from requirements files - # this will fail with an exception if there are packages we cannot find - - required_name_versions = set() - - for req_file in requirements_files: - nvs = utils_thirdparty.load_requirements( - requirements_file=req_file, force_pinned=False) - required_name_versions.update(nvs) - if latest_version: - required_name_versions = set((name, None) for name, _ver in required_name_versions) - - print(f'PROCESSING {len(required_name_versions)} REQUIREMENTS in {len(requirements_files)} FILES') - - # fetch all available wheels, keep track of missing - # start with local, then remote, then PyPI - - print('==> COLLECTING ALREADY LOCALLY AVAILABLE REQUIRED WHEELS') - # list of all the wheel filenames either pre-existing, fetched or built - # updated as we progress - available_wheel_filenames = [] - - local_packages_by_namever = { - (p.name, p.version): p - for p in utils_thirdparty.get_local_packages(directory=thirdparty_dir) - } - - # list of (name, version, environment) not local and to fetch - name_version_envt_to_fetch = [] - - # start with a local check - for (name, version), envt in itertools.product(required_name_versions, environments): - local_pack = local_packages_by_namever.get((name, version,)) - if local_pack: - supported_wheels = list(local_pack.get_supported_wheels(environment=envt)) - if supported_wheels: - available_wheel_filenames.extend(w.filename for w in supported_wheels) - print(f'====> No fetch or build needed. ' - f'Local wheel already available for {name}=={version} ' - f'on os: {envt.operating_system} for Python: {envt.python_version}') - continue - - name_version_envt_to_fetch.append((name, version, envt,)) - - print(f'==> TRYING TO FETCH #{len(name_version_envt_to_fetch)} REQUIRED WHEELS') - - # list of (name, version, environment) not fetch and to build - name_version_envt_to_build = [] - - # then check if the wheel can be fetched without building from remote and Pypi - for name, version, envt in name_version_envt_to_fetch: - - fetched_fwn = utils_thirdparty.fetch_package_wheel( - name=name, - version=version, - environment=envt, - dest_dir=thirdparty_dir, - ) - - if fetched_fwn: - available_wheel_filenames.append(fetched_fwn) - else: - name_version_envt_to_build.append((name, version, envt,)) - - # At this stage we have all the wheels we could obtain without building - for name, version, envt in name_version_envt_to_build: - print(f'====> Need to build wheels for {name}=={version} on os: ' - f'{envt.operating_system} for Python: {envt.python_version}') - - packages_and_envts_to_build = [ - (PypiPackage(name, version), envt) - for name, version, envt in name_version_envt_to_build - ] - - print(f'==> BUILDING #{len(packages_and_envts_to_build)} MISSING WHEELS') - - package_envts_not_built, wheel_filenames_built = utils_thirdparty.build_missing_wheels( - packages_and_envts=packages_and_envts_to_build, - build_remotely=build_remotely, - with_deps=with_deps, - dest_dir=thirdparty_dir, -) - if wheel_filenames_built: - available_wheel_filenames.extend(available_wheel_filenames) - - for pack, envt in package_envts_not_built: - print( - f'====> FAILED to build any wheel for {pack.name}=={pack.version} ' - f'on os: {envt.operating_system} for Python: {envt.python_version}' - ) - - print(f'==> FETCHING SOURCE DISTRIBUTIONS') - # fetch all sources, keep track of missing - # This is a list of (name, version) - utils_thirdparty.fetch_missing_sources(dest_dir=thirdparty_dir) - - print(f'==> FETCHING ABOUT AND LICENSE FILES') - utils_thirdparty.add_fetch_or_update_about_and_license_files(dest_dir=thirdparty_dir) - - ############################################################################ - if sync_dejacode: - print(f'==> SYNC WITH DEJACODE') - # try to fetch from DejaCode any missing ABOUT - # create all missing DejaCode packages - pass - - utils_thirdparty.find_problems(dest_dir=thirdparty_dir) - - -if __name__ == '__main__': - bootstrap() diff --git a/etc/scripts/build_wheels.py b/etc/scripts/build_wheels.py deleted file mode 100644 index 352b705..0000000 --- a/etc/scripts/build_wheels.py +++ /dev/null @@ -1,97 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# -# Copyright (c) nexB Inc. and others. All rights reserved. -# ScanCode is a trademark of nexB Inc. -# SPDX-License-Identifier: Apache-2.0 -# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/skeleton for support or download. -# See https://aboutcode.org for more information about nexB OSS projects. -# -import click - -import utils_thirdparty - - -@click.command() - -@click.option('-n', '--name', - type=str, - metavar='PACKAGE_NAME', - required=True, - help='Python package name to add or build.', -) -@click.option('-v', '--version', - type=str, - default=None, - metavar='VERSION', - help='Python package version to add or build.', -) -@click.option('-d', '--thirdparty-dir', - type=click.Path(exists=True, readable=True, path_type=str, file_okay=False), - metavar='DIR', - default=utils_thirdparty.THIRDPARTY_DIR, - show_default=True, - help='Path to the thirdparty directory where wheels are built.', -) -@click.option('-p', '--python-version', - type=click.Choice(utils_thirdparty.PYTHON_VERSIONS), - metavar='PYVER', - default=utils_thirdparty.PYTHON_VERSIONS, - show_default=True, - multiple=True, - help='Python version to use for this build.', -) -@click.option('-o', '--operating-system', - type=click.Choice(utils_thirdparty.PLATFORMS_BY_OS), - metavar='OS', - default=tuple(utils_thirdparty.PLATFORMS_BY_OS), - multiple=True, - show_default=True, - help='OS to use for this build: one of linux, mac or windows.', -) -@click.option('--build-remotely', - is_flag=True, - help='Build missing wheels remotely.', -) -@click.option('--with-deps', - is_flag=True, - help='Also include all dependent wheels.', -) -@click.option('--verbose', - is_flag=True, - help='Provide verbose output.', -) -@click.help_option('-h', '--help') -def build_wheels( - name, - version, - thirdparty_dir, - python_version, - operating_system, - with_deps, - build_remotely, - verbose, -): - """ - Build to THIRDPARTY_DIR all the wheels for the Python PACKAGE_NAME and - optional VERSION. Build wheels compatible with all the `--python-version` - PYVER(s) and `--operating_system` OS(s). - - Build native wheels remotely if needed when `--build-remotely` and include - all dependencies with `--with-deps`. - """ - utils_thirdparty.add_or_upgrade_built_wheels( - name=name, - version=version, - python_versions=python_version, - operating_systems=operating_system, - dest_dir=thirdparty_dir, - build_remotely=build_remotely, - with_deps=with_deps, - verbose=verbose, - ) - - -if __name__ == '__main__': - build_wheels() diff --git a/etc/scripts/check_thirdparty.py b/etc/scripts/check_thirdparty.py index e48cfce..b052f25 100644 --- a/etc/scripts/check_thirdparty.py +++ b/etc/scripts/check_thirdparty.py @@ -14,19 +14,42 @@ @click.command() - -@click.option('-d', '--thirdparty-dir', +@click.option( + "-d", + "--dest", type=click.Path(exists=True, readable=True, path_type=str, file_okay=False), required=True, - help='Path to the thirdparty directory to check.', + help="Path to the thirdparty directory to check.", +) +@click.option( + "-w", + "--wheels", + is_flag=True, + help="Check missing wheels.", +) +@click.option( + "-s", + "--sdists", + is_flag=True, + help="Check missing source sdists tarballs.", ) -@click.help_option('-h', '--help') -def check_thirdparty_dir(thirdparty_dir): +@click.help_option("-h", "--help") +def check_thirdparty_dir( + dest, + wheels, + sdists, +): """ - Check a thirdparty directory for problems. + Check a thirdparty directory for problems and print these on screen. """ - utils_thirdparty.find_problems(dest_dir=thirdparty_dir) + # check for problems + print(f"==> CHECK FOR PROBLEMS") + utils_thirdparty.find_problems( + dest_dir=dest, + report_missing_sources=sdists, + report_missing_wheels=wheels, + ) -if __name__ == '__main__': +if __name__ == "__main__": check_thirdparty_dir() diff --git a/etc/scripts/fetch_requirements.py b/etc/scripts/fetch_requirements.py deleted file mode 100644 index 21de865..0000000 --- a/etc/scripts/fetch_requirements.py +++ /dev/null @@ -1,145 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# -# Copyright (c) nexB Inc. and others. All rights reserved. -# ScanCode is a trademark of nexB Inc. -# SPDX-License-Identifier: Apache-2.0 -# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/skeleton for support or download. -# See https://aboutcode.org for more information about nexB OSS projects. -# -import itertools - -import click - -import utils_thirdparty - - -@click.command() - -@click.option('-r', '--requirements-file', - type=click.Path(exists=True, readable=True, path_type=str, dir_okay=False), - metavar='FILE', - multiple=True, - default=['requirements.txt'], - show_default=True, - help='Path to the requirements file to use for thirdparty packages.', -) -@click.option('-d', '--thirdparty-dir', - type=click.Path(exists=True, readable=True, path_type=str, file_okay=False), - metavar='DIR', - default=utils_thirdparty.THIRDPARTY_DIR, - show_default=True, - help='Path to the thirdparty directory.', -) -@click.option('-p', '--python-version', - type=click.Choice(utils_thirdparty.PYTHON_VERSIONS), - metavar='INT', - multiple=True, - default=['36'], - show_default=True, - help='Python version to use for this build.', -) -@click.option('-o', '--operating-system', - type=click.Choice(utils_thirdparty.PLATFORMS_BY_OS), - metavar='OS', - multiple=True, - default=['linux'], - show_default=True, - help='OS to use for this build: one of linux, mac or windows.', -) -@click.option('-s', '--with-sources', - is_flag=True, - help='Fetch the corresponding source distributions.', -) -@click.option('-a', '--with-about', - is_flag=True, - help='Fetch the corresponding ABOUT and LICENSE files.', -) -@click.option('--allow-unpinned', - is_flag=True, - help='Allow requirements without pinned versions.', -) -@click.option('-s', '--only-sources', - is_flag=True, - help='Fetch only the corresponding source distributions.', -) -@click.option('-u', '--remote-links-url', - type=str, - metavar='URL', - default=utils_thirdparty.REMOTE_LINKS_URL, - show_default=True, - help='URL to a PyPI-like links web site. ' - 'Or local path to a directory with wheels.', -) - -@click.help_option('-h', '--help') -def fetch_requirements( - requirements_file, - thirdparty_dir, - python_version, - operating_system, - with_sources, - with_about, - allow_unpinned, - only_sources, - remote_links_url=utils_thirdparty.REMOTE_LINKS_URL, -): - """ - Fetch and save to THIRDPARTY_DIR all the required wheels for pinned - dependencies found in the `--requirement` FILE requirements file(s). Only - fetch wheels compatible with the provided `--python-version` and - `--operating-system`. - Also fetch the corresponding .ABOUT, .LICENSE and .NOTICE files together - with a virtualenv.pyz app. - - Use exclusively wheel not from PyPI but rather found in the PyPI-like link - repo ``remote_links_url`` if this is a URL. Treat this ``remote_links_url`` - as a local directory path to a wheels directory if this is not a a URL. - """ - - # fetch wheels - python_versions = python_version - operating_systems = operating_system - requirements_files = requirements_file - - if not only_sources: - envs = itertools.product(python_versions, operating_systems) - envs = (utils_thirdparty.Environment.from_pyver_and_os(pyv, os) for pyv, os in envs) - - for env, reqf in itertools.product(envs, requirements_files): - - for package, error in utils_thirdparty.fetch_wheels( - environment=env, - requirements_file=reqf, - allow_unpinned=allow_unpinned, - dest_dir=thirdparty_dir, - remote_links_url=remote_links_url, - ): - if error: - print('Failed to fetch wheel:', package, ':', error) - - # optionally fetch sources - if with_sources or only_sources: - - for reqf in requirements_files: - for package, error in utils_thirdparty.fetch_sources( - requirements_file=reqf, - allow_unpinned=allow_unpinned, - dest_dir=thirdparty_dir, - remote_links_url=remote_links_url, - ): - if error: - print('Failed to fetch source:', package, ':', error) - - if with_about: - utils_thirdparty.add_fetch_or_update_about_and_license_files(dest_dir=thirdparty_dir) - utils_thirdparty.find_problems( - dest_dir=thirdparty_dir, - report_missing_sources=with_sources or only_sources, - report_missing_wheels=not only_sources, - ) - - -if __name__ == '__main__': - fetch_requirements() diff --git a/etc/scripts/fetch_thirdparty.py b/etc/scripts/fetch_thirdparty.py new file mode 100644 index 0000000..26d520f --- /dev/null +++ b/etc/scripts/fetch_thirdparty.py @@ -0,0 +1,275 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# ScanCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/skeleton for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import itertools +import os +import sys + +import click + +import utils_thirdparty +import utils_requirements + +TRACE = False +TRACE_DEEP = False + + +@click.command() +@click.option( + "-r", + "--requirements", + "requirements_files", + type=click.Path(exists=True, readable=True, path_type=str, dir_okay=False), + metavar="REQUIREMENT-FILE", + multiple=True, + required=False, + help="Path to pip requirements file(s) listing thirdparty packages.", +) +@click.option( + "--spec", + "--specifier", + "specifiers", + type=str, + metavar="SPECIFIER", + multiple=True, + required=False, + help="Thirdparty package name==version specification(s) as in django==1.2.3. " + "With --latest-version a plain package name is also acceptable.", +) +@click.option( + "-l", + "--latest-version", + is_flag=True, + help="Get the latest version of all packages, ignoring any specified versions.", +) +@click.option( + "-d", + "--dest", + "dest_dir", + type=click.Path(exists=True, readable=True, path_type=str, file_okay=False), + metavar="DIR", + default=utils_thirdparty.THIRDPARTY_DIR, + show_default=True, + help="Path to the detsination directory where to save downloaded wheels, " + "sources, ABOUT and LICENSE files..", +) +@click.option( + "-w", + "--wheels", + is_flag=True, + help="Download wheels.", +) +@click.option( + "-s", + "--sdists", + is_flag=True, + help="Download source sdists tarballs.", +) +@click.option( + "-p", + "--python-version", + "python_versions", + type=click.Choice(utils_thirdparty.PYTHON_VERSIONS), + metavar="PYVER", + default=utils_thirdparty.PYTHON_VERSIONS, + show_default=True, + multiple=True, + help="Python version(s) to use for wheels.", +) +@click.option( + "-o", + "--operating-system", + "operating_systems", + type=click.Choice(utils_thirdparty.PLATFORMS_BY_OS), + metavar="OS", + default=tuple(utils_thirdparty.PLATFORMS_BY_OS), + multiple=True, + show_default=True, + help="OS(ses) to use for wheels: one of linux, mac or windows.", +) +@click.option( + "--index-url", + "index_urls", + type=str, + metavar="INDEX", + default=utils_thirdparty.PYPI_INDEX_URLS, + show_default=True, + multiple=True, + help="PyPI index URL(s) to use for wheels and sources, in order of preferences.", +) +@click.option( + "--use-cached-index", + is_flag=True, + help="Use on disk cached PyPI indexes list of packages and versions and do not refetch if present.", +) + +@click.help_option("-h", "--help") +def fetch_thirdparty( + requirements_files, + specifiers, + latest_version, + dest_dir, + python_versions, + operating_systems, + wheels, + sdists, + index_urls, + use_cached_index, +): + """ + Download to --dest THIRDPARTY_DIR the PyPI wheels, source distributions, + and their ABOUT metadata, license and notices files. + + Download the PyPI packages listed in the combination of: + - the pip requirements --requirements REQUIREMENT-FILE(s), + - the pip name==version --specifier SPECIFIER(s) + - any pre-existing wheels or sdsists found in --dest-dir THIRDPARTY_DIR. + + Download wheels with the --wheels option for the ``--python-version`` + PYVER(s) and ``--operating_system`` OS(s) combinations defaulting to all + supported combinations. + + Download sdists tarballs with the --sdists option. + + Generate or Download .ABOUT, .LICENSE and .NOTICE files for all the wheels + and sources fetched. + + Download from the provided PyPI simple --index-url INDEX(s) URLs. + """ + if not (wheels or sdists): + print("Error: one or both of --wheels and --sdists is required.") + sys.exit(1) + + print(f"COLLECTING REQUIRED NAMES & VERSIONS FROM {dest_dir}") + + existing_packages_by_nv = { + (package.name, package.version): package + for package in utils_thirdparty.get_local_packages(directory=dest_dir) + } + + required_name_versions = set(existing_packages_by_nv.keys()) + + for req_file in requirements_files: + nvs = utils_requirements.load_requirements( + requirements_file=req_file, + with_unpinned=latest_version, + ) + required_name_versions.update(nvs) + + for specifier in specifiers: + nv = utils_requirements.get_required_name_version( + requirement=specifier, + with_unpinned=latest_version, + ) + required_name_versions.add(nv) + + if latest_version: + names = set(name for name, _version in sorted(required_name_versions)) + required_name_versions = {(n, None) for n in names} + + if not required_name_versions: + print("Error: no requirements requested.") + sys.exit(1) + + if TRACE_DEEP: + print("required_name_versions:") + for n, v in required_name_versions: + print(f" {n} @ {v}") + + # create the environments matrix we need for wheels + environments = None + if wheels: + evts = itertools.product(python_versions, operating_systems) + environments = [utils_thirdparty.Environment.from_pyver_and_os(pyv, os) for pyv, os in evts] + + # Collect PyPI repos + repos = [] + for index_url in index_urls: + index_url = index_url.strip("/") + existing = utils_thirdparty.DEFAULT_PYPI_REPOS_BY_URL.get(index_url) + if existing: + existing.use_cached_index = use_cached_index + repos.append(existing) + else: + repo = utils_thirdparty.PypiSimpleRepository( + index_url=index_url, + use_cached_index=use_cached_index, + ) + repos.append(repo) + + wheels_fetched = [] + wheels_not_found = [] + + sdists_fetched = [] + sdists_not_found = [] + + for name, version in sorted(required_name_versions): + nv = name, version + print(f"Processing: {name} @ {version}") + if wheels: + for environment in environments: + if TRACE: + print(f" ==> Fetching wheel for envt: {environment}") + fwfns = utils_thirdparty.download_wheel( + name=name, + version=version, + environment=environment, + dest_dir=dest_dir, + repos=repos, + ) + if fwfns: + wheels_fetched.extend(fwfns) + else: + wheels_not_found.append(f"{name}=={version} for: {environment}") + if TRACE: + print(f" NOT FOUND") + + if sdists: + if TRACE: + print(f" ==> Fetching sdist: {name}=={version}") + fetched = utils_thirdparty.download_sdist( + name=name, + version=version, + dest_dir=dest_dir, + repos=repos, + ) + if fetched: + sdists_fetched.append(fetched) + else: + sdists_not_found.append(f"{name}=={version}") + if TRACE: + print(f" NOT FOUND") + + if wheels and wheels_not_found: + print(f"==> MISSING WHEELS") + for wh in wheels_not_found: + print(f" {wh}") + + if sdists and sdists_not_found: + print(f"==> MISSING SDISTS") + for sd in sdists_not_found: + print(f" {sd}") + + print(f"==> FETCHING OR CREATING ABOUT AND LICENSE FILES") + utils_thirdparty.fetch_abouts_and_licenses(dest_dir=dest_dir, use_cached_index=use_cached_index) + utils_thirdparty.clean_about_files(dest_dir=dest_dir) + + # check for problems + print(f"==> CHECK FOR PROBLEMS") + utils_thirdparty.find_problems( + dest_dir=dest_dir, + report_missing_sources=sdists, + report_missing_wheels=wheels, + ) + + +if __name__ == "__main__": + fetch_thirdparty() diff --git a/etc/scripts/fix_thirdparty.py b/etc/scripts/fix_thirdparty.py deleted file mode 100644 index 061d3fa..0000000 --- a/etc/scripts/fix_thirdparty.py +++ /dev/null @@ -1,81 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# -# Copyright (c) nexB Inc. and others. All rights reserved. -# ScanCode is a trademark of nexB Inc. -# SPDX-License-Identifier: Apache-2.0 -# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/skeleton for support or download. -# See https://aboutcode.org for more information about nexB OSS projects. -# -import click - -import utils_thirdparty - - -@click.command() - -@click.option('-d', '--thirdparty-dir', - type=click.Path(exists=True, readable=True, path_type=str, file_okay=False), - required=True, - help='Path to the thirdparty directory to fix.', -) -@click.option('--build-wheels', - is_flag=True, - help='Build all missing wheels .', -) -@click.option('--build-remotely', - is_flag=True, - help='Build missing wheels remotely.', -) -@click.help_option('-h', '--help') -def fix_thirdparty_dir( - thirdparty_dir, - build_wheels, - build_remotely, -): - """ - Fix a thirdparty directory of dependent package wheels and sdist. - - Multiple fixes are applied: - - fetch or build missing binary wheels - - fetch missing source distributions - - derive, fetch or add missing ABOUT files - - fetch missing .LICENSE and .NOTICE files - - remove outdated package versions and the ABOUT, .LICENSE and .NOTICE files - - Optionally build missing binary wheels for all supported OS and Python - version combos locally or remotely. - """ - print('***FETCH*** MISSING WHEELS') - package_envts_not_fetched = utils_thirdparty.fetch_missing_wheels(dest_dir=thirdparty_dir) - print('***FETCH*** MISSING SOURCES') - src_name_ver_not_fetched = utils_thirdparty.fetch_missing_sources(dest_dir=thirdparty_dir) - - package_envts_not_built = [] - if build_wheels: - print('***BUILD*** MISSING WHEELS') - package_envts_not_built, _wheel_filenames_built = utils_thirdparty.build_missing_wheels( - packages_and_envts=package_envts_not_fetched, - build_remotely=build_remotely, - dest_dir=thirdparty_dir, - ) - - print('***ADD*** ABOUT AND LICENSES') - utils_thirdparty.add_fetch_or_update_about_and_license_files(dest_dir=thirdparty_dir) - - # report issues - for name, version in src_name_ver_not_fetched: - print(f'{name}=={version}: Failed to fetch source distribution.') - - for package, envt in package_envts_not_built: - print( - f'{package.name}=={package.version}: Failed to build wheel ' - f'on {envt.operating_system} for Python {envt.python_version}') - - print('***FIND PROBLEMS***') - utils_thirdparty.find_problems(dest_dir=thirdparty_dir) - - -if __name__ == '__main__': - fix_thirdparty_dir() diff --git a/etc/scripts/gen_pypi_simple.py b/etc/scripts/gen_pypi_simple.py index 887e407..03312ab 100644 --- a/etc/scripts/gen_pypi_simple.py +++ b/etc/scripts/gen_pypi_simple.py @@ -5,81 +5,46 @@ # Copyright (c) 2010 David Wolever . All rights reserved. # originally from https://github.com/wolever/pip2pi +import hashlib import os import re import shutil - +from collections import defaultdict from html import escape from pathlib import Path +from typing import NamedTuple """ -name: pip compatibility tags -version: 20.3.1 -download_url: https://github.com/pypa/pip/blob/20.3.1/src/pip/_internal/models/wheel.py -copyright: Copyright (c) 2008-2020 The pip developers (see AUTHORS.txt file) -license_expression: mit -notes: the weel name regex is copied from pip-20.3.1 pip/_internal/models/wheel.py - -Copyright (c) 2008-2020 The pip developers (see AUTHORS.txt file) - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice shall be -included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +Generate a PyPI simple index froma directory. """ -get_wheel_from_filename = re.compile( - r"""^(?P(?P.+?)-(?P.*?)) - ((-(?P\d[^-]*?))?-(?P.+?)-(?P.+?)-(?P.+?) - \.whl)$""", - re.VERBOSE -).match - -sdist_exts = ".tar.gz", ".tar.bz2", ".zip", ".tar.xz", -wheel_ext = ".whl" -app_ext = ".pyz" -dist_exts = sdist_exts + (wheel_ext, app_ext) class InvalidDistributionFilename(Exception): pass -def get_package_name_from_filename(filename, normalize=True): +def get_package_name_from_filename(filename): """ - Return the package name extracted from a package ``filename``. - Optionally ``normalize`` the name according to distribution name rules. + Return the normalized package name extracted from a package ``filename``. + Normalization is done according to distribution name rules. Raise an ``InvalidDistributionFilename`` if the ``filename`` is invalid:: >>> get_package_name_from_filename("foo-1.2.3_rc1.tar.gz") 'foo' - >>> get_package_name_from_filename("foo-bar-1.2-py27-none-any.whl") + >>> get_package_name_from_filename("foo_bar-1.2-py27-none-any.whl") 'foo-bar' >>> get_package_name_from_filename("Cython-0.17.2-cp26-none-linux_x86_64.whl") 'cython' >>> get_package_name_from_filename("python_ldap-2.4.19-cp27-none-macosx_10_10_x86_64.whl") 'python-ldap' - >>> get_package_name_from_filename("foo.whl") - Traceback (most recent call last): - ... - InvalidDistributionFilename: ... - >>> get_package_name_from_filename("foo.png") - Traceback (most recent call last): - ... - InvalidFilePackageName: ... + >>> try: + ... get_package_name_from_filename("foo.whl") + ... except InvalidDistributionFilename: + ... pass + >>> try: + ... get_package_name_from_filename("foo.png") + ... except InvalidDistributionFilename: + ... pass """ if not filename or not filename.endswith(dist_exts): raise InvalidDistributionFilename(filename) @@ -98,7 +63,7 @@ def get_package_name_from_filename(filename, normalize=True): if not extension or not name_ver: raise InvalidDistributionFilename(filename) - name, _, version = name_ver.rpartition('-') + name, _, version = name_ver.rpartition("-") if not (name and version): raise InvalidDistributionFilename(filename) @@ -110,8 +75,8 @@ def get_package_name_from_filename(filename, normalize=True): if not wheel_info: raise InvalidDistributionFilename(filename) - name = wheel_info.group('name') - version = wheel_info.group('version') + name = wheel_info.group("name") + version = wheel_info.group("version") if not (name and version): raise InvalidDistributionFilename(filename) @@ -120,25 +85,106 @@ def get_package_name_from_filename(filename, normalize=True): name_ver, extension, _ = filename.rpartition(".pyz") if "-" in filename: - name, _, version = name_ver.rpartition('-') + name, _, version = name_ver.rpartition("-") else: name = name_ver if not name: raise InvalidDistributionFilename(filename) - if normalize: - name = name.lower().replace('_', '-') + name = normalize_name(name) return name -def build_pypi_index(directory, write_index=False): +def normalize_name(name): + """ + Return a normalized package name per PEP503, and copied from + https://www.python.org/dev/peps/pep-0503/#id4 """ - Using a ``directory`` directory of wheels and sdists, create the a PyPI simple - directory index at ``directory``/simple/ populated with the proper PyPI simple - index directory structure crafted using symlinks. + return name and re.sub(r"[-_.]+", "-", name).lower() or name + + +def build_per_package_index(pkg_name, packages, base_url): + """ + Return an HTML document as string representing the index for a package + """ + document = [] + header = f""" + + + + Links for {pkg_name} + + """ + document.append(header) + + for package in packages: + document.append(package.simple_index_entry(base_url)) + + footer = """ + +""" + document.append(footer) + return "\n".join(document) + + +def build_links_package_index(packages_by_package_name, base_url): + """ + Return an HTML document as string which is a links index of all packages + """ + document = [] + header = f""" + + + Links for all packages + + """ + document.append(header) + + for _name, packages in packages_by_package_name.items(): + for package in packages: + document.append(package.simple_index_entry(base_url)) + + footer = """ + +""" + document.append(footer) + return "\n".join(document) + + +class Package(NamedTuple): + name: str + index_dir: Path + archive_file: Path + checksum: str + + @classmethod + def from_file(cls, name, index_dir, archive_file): + with open(archive_file, "rb") as f: + checksum = hashlib.sha256(f.read()).hexdigest() + return cls( + name=name, + index_dir=index_dir, + archive_file=archive_file, + checksum=checksum, + ) + + def simple_index_entry(self, base_url): + return ( + f' ' + f"{self.archive_file.name}
" + ) + + +def build_pypi_index(directory, base_url="https://thirdparty.aboutcode.org/pypi"): + """ + Using a ``directory`` directory of wheels and sdists, create the a PyPI + simple directory index at ``directory``/simple/ populated with the proper + PyPI simple index directory structure crafted using symlinks. WARNING: The ``directory``/simple/ directory is removed if it exists. + NOTE: in addition to the a PyPI simple index.html there is also a links.html + index file generated which is suitable to use with pip's --find-links """ directory = Path(directory) @@ -148,14 +194,15 @@ def build_pypi_index(directory, write_index=False): shutil.rmtree(str(index_dir), ignore_errors=True) index_dir.mkdir(parents=True) + packages_by_package_name = defaultdict(list) - if write_index: - simple_html_index = [ - "PyPI Simple Index", - "", - ] + # generate the main simple index.html + simple_html_index = [ + "", + "PyPI Simple Index", + '' '', + ] - package_names = set() for pkg_file in directory.iterdir(): pkg_filename = pkg_file.name @@ -167,25 +214,102 @@ def build_pypi_index(directory, write_index=False): ): continue - pkg_name = get_package_name_from_filename(pkg_filename) + pkg_name = get_package_name_from_filename( + filename=pkg_filename, + ) pkg_index_dir = index_dir / pkg_name pkg_index_dir.mkdir(parents=True, exist_ok=True) pkg_indexed_file = pkg_index_dir / pkg_filename + link_target = Path("../..") / pkg_filename pkg_indexed_file.symlink_to(link_target) - if write_index and pkg_name not in package_names: + if pkg_name not in packages_by_package_name: esc_name = escape(pkg_name) simple_html_index.append(f'{esc_name}
') - package_names.add(pkg_name) - if write_index: - simple_html_index.append("") - index_html = index_dir / "index.html" - index_html.write_text("\n".join(simple_html_index)) + packages_by_package_name[pkg_name].append( + Package.from_file( + name=pkg_name, + index_dir=pkg_index_dir, + archive_file=pkg_file, + ) + ) + + # finalize main index + simple_html_index.append("") + index_html = index_dir / "index.html" + index_html.write_text("\n".join(simple_html_index)) + + # also generate the simple index.html of each package, listing all its versions. + for pkg_name, packages in packages_by_package_name.items(): + per_package_index = build_per_package_index( + pkg_name=pkg_name, + packages=packages, + base_url=base_url, + ) + pkg_index_dir = packages[0].index_dir + ppi_html = pkg_index_dir / "index.html" + ppi_html.write_text(per_package_index) + + # also generate the a links.html page with all packages. + package_links = build_links_package_index( + packages_by_package_name=packages_by_package_name, + base_url=base_url, + ) + links_html = index_dir / "links.html" + links_html.write_text(package_links) + + +""" +name: pip-wheel +version: 20.3.1 +download_url: https://github.com/pypa/pip/blob/20.3.1/src/pip/_internal/models/wheel.py +copyright: Copyright (c) 2008-2020 The pip developers (see AUTHORS.txt file) +license_expression: mit +notes: the wheel name regex is copied from pip-20.3.1 pip/_internal/models/wheel.py + +Copyright (c) 2008-2020 The pip developers (see AUTHORS.txt file) +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +""" +get_wheel_from_filename = re.compile( + r"""^(?P(?P.+?)-(?P.*?)) + ((-(?P\d[^-]*?))?-(?P.+?)-(?P.+?)-(?P.+?) + \.whl)$""", + re.VERBOSE, +).match + +sdist_exts = ( + ".tar.gz", + ".tar.bz2", + ".zip", + ".tar.xz", +) + +wheel_ext = ".whl" +app_ext = ".pyz" +dist_exts = sdist_exts + (wheel_ext, app_ext) if __name__ == "__main__": import sys + pkg_dir = sys.argv[1] build_pypi_index(pkg_dir) diff --git a/etc/scripts/gen_requirements.py b/etc/scripts/gen_requirements.py index 3be974c..07e26f7 100644 --- a/etc/scripts/gen_requirements.py +++ b/etc/scripts/gen_requirements.py @@ -8,36 +8,50 @@ # See https://github.com/nexB/skeleton for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # -import click +import argparse +import pathlib + import utils_requirements +""" +Utilities to manage requirements files. +NOTE: this should use ONLY the standard library and not import anything else +because this is used for boostrapping with no requirements installed. +""" -@click.command() - -@click.option('-s', '--site-packages-dir', - type=click.Path(exists=True, readable=True, path_type=str, file_okay=False, resolve_path=True), - required=True, - metavar='DIR', - help='Path to the "site-packages" directory where wheels are installed such as lib/python3.6/site-packages', -) -@click.option('-r', '--requirements-file', - type=click.Path(path_type=str, dir_okay=False), - metavar='FILE', - default='requirements.txt', - show_default=True, - help='Path to the requirements file to update or create.', -) -@click.help_option('-h', '--help') -def gen_requirements(site_packages_dir, requirements_file): - """ + +def gen_requirements(): + description = """ Create or replace the `--requirements-file` file FILE requirements file with all locally installed Python packages.all Python packages found installed in `--site-packages-dir` """ + parser = argparse.ArgumentParser(description=description) + + parser.add_argument( + "-s", + "--site-packages-dir", + dest="site_packages_dir", + type=pathlib.Path, + required=True, + metavar="DIR", + help="Path to the 'site-packages' directory where wheels are installed such as lib/python3.6/site-packages", + ) + parser.add_argument( + "-r", + "--requirements-file", + type=pathlib.Path, + metavar="FILE", + default="requirements.txt", + help="Path to the requirements file to update or create.", + ) + + args = parser.parse_args() + utils_requirements.lock_requirements( - requirements_file=requirements_file, - site_packages_dir=site_packages_dir, + site_packages_dir=args.site_packages_dir, + requirements_file=args.requirements_file, ) -if __name__ == '__main__': +if __name__ == "__main__": gen_requirements() diff --git a/etc/scripts/gen_requirements_dev.py b/etc/scripts/gen_requirements_dev.py index ff4ce50..12cc06d 100644 --- a/etc/scripts/gen_requirements_dev.py +++ b/etc/scripts/gen_requirements_dev.py @@ -8,48 +8,61 @@ # See https://github.com/nexB/skeleton for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # -import click +import argparse +import pathlib + import utils_requirements +""" +Utilities to manage requirements files. +NOTE: this should use ONLY the standard library and not import anything else +because this is used for boostrapping with no requirements installed. +""" -@click.command() - -@click.option('-s', '--site-packages-dir', - type=click.Path(exists=True, readable=True, path_type=str, file_okay=False, resolve_path=True), - required=True, - metavar='DIR', - help='Path to the "site-packages" directory where wheels are installed such as lib/python3.6/site-packages', -) -@click.option('-d', '--dev-requirements-file', - type=click.Path(path_type=str, dir_okay=False), - metavar='FILE', - default='requirements-dev.txt', - show_default=True, - help='Path to the dev requirements file to update or create.', -) -@click.option('-r', '--main-requirements-file', - type=click.Path(path_type=str, dir_okay=False), - default='requirements.txt', - metavar='FILE', - show_default=True, - help='Path to the main requirements file. Its requirements will be excluded ' - 'from the generated dev requirements.', -) -@click.help_option('-h', '--help') -def gen_dev_requirements(site_packages_dir, dev_requirements_file, main_requirements_file): - """ + +def gen_dev_requirements(): + description = """ Create or overwrite the `--dev-requirements-file` pip requirements FILE with all Python packages found installed in `--site-packages-dir`. Exclude package names also listed in the --main-requirements-file pip requirements FILE (that are assume to the production requirements and therefore to always be present in addition to the development requirements). """ + parser = argparse.ArgumentParser(description=description) + + parser.add_argument( + "-s", + "--site-packages-dir", + type=pathlib.Path, + required=True, + metavar="DIR", + help='Path to the "site-packages" directory where wheels are installed such as lib/python3.6/site-packages', + ) + parser.add_argument( + "-d", + "--dev-requirements-file", + type=pathlib.Path, + metavar="FILE", + default="requirements-dev.txt", + help="Path to the dev requirements file to update or create.", + ) + parser.add_argument( + "-r", + "--main-requirements-file", + type=pathlib.Path, + default="requirements.txt", + metavar="FILE", + help="Path to the main requirements file. Its requirements will be excluded " + "from the generated dev requirements.", + ) + args = parser.parse_args() + utils_requirements.lock_dev_requirements( - dev_requirements_file=dev_requirements_file, - main_requirements_file=main_requirements_file, - site_packages_dir=site_packages_dir + dev_requirements_file=args.dev_requirements_file, + main_requirements_file=args.main_requirements_file, + site_packages_dir=args.site_packages_dir, ) -if __name__ == '__main__': +if __name__ == "__main__": gen_dev_requirements() diff --git a/etc/scripts/publish_files.py b/etc/scripts/publish_files.py deleted file mode 100644 index f343cb3..0000000 --- a/etc/scripts/publish_files.py +++ /dev/null @@ -1,204 +0,0 @@ -#!/usr/bin/env python -# -# Copyright (c) nexB Inc. and others. All rights reserved. -# ScanCode is a trademark of nexB Inc. -# SPDX-License-Identifier: Apache-2.0 -# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/scancode-toolkit for support or download. -# See https://aboutcode.org for more information about nexB OSS projects. -# -import hashlib -import os -import sys - -from pathlib import Path - -import click -import requests -import utils_thirdparty - -from github_release_retry import github_release_retry as grr - -""" -Create GitHub releases and upload files there. -""" - - -def get_files(location): - """ - Return an iterable of (filename, Path, md5) tuples for files in the `location` - directory tree recursively. - """ - for top, _dirs, files in os.walk(location): - for filename in files: - pth = Path(os.path.join(top, filename)) - with open(pth, 'rb') as fi: - md5 = hashlib.md5(fi.read()).hexdigest() - yield filename, pth, md5 - - -def get_etag_md5(url): - """ - Return the cleaned etag of URL `url` or None. - """ - headers = utils_thirdparty.get_remote_headers(url) - headers = {k.lower(): v for k, v in headers.items()} - etag = headers .get('etag') - if etag: - etag = etag.strip('"').lower() - return etag - - -def create_or_update_release_and_upload_directory( - user, - repo, - tag_name, - token, - directory, - retry_limit=10, - description=None, -): - """ - Create or update a GitHub release at https://github.com// for - `tag_name` tag using the optional `description` for this release. - Use the provided `token` as a GitHub token for API calls authentication. - Upload all files found in the `directory` tree to that GitHub release. - Retry API calls up to `retry_limit` time to work around instability the - GitHub API. - - Remote files that are not the same as the local files are deleted and re- - uploaded. - """ - release_homepage_url = f'https://github.com/{user}/{repo}/releases/{tag_name}' - - # scrape release page HTML for links - urls_by_filename = {os.path.basename(l): l - for l in utils_thirdparty.get_paths_or_urls(links_url=release_homepage_url) - } - - # compute what is new, modified or unchanged - print(f'Compute which files is new, modified or unchanged in {release_homepage_url}') - - new_to_upload = [] - unchanged_to_skip = [] - modified_to_delete_and_reupload = [] - for filename, pth, md5 in get_files(directory): - url = urls_by_filename.get(filename) - if not url: - print(f'{filename} content is NEW, will upload') - new_to_upload.append(pth) - continue - - out_of_date = get_etag_md5(url) != md5 - if out_of_date: - print(f'{url} content is CHANGED based on md5 etag, will re-upload') - modified_to_delete_and_reupload.append(pth) - else: - # print(f'{url} content is IDENTICAL, skipping upload based on Etag') - unchanged_to_skip.append(pth) - print('.') - - ghapi = grr.GithubApi( - github_api_url='https://api.github.com', - user=user, - repo=repo, - token=token, - retry_limit=retry_limit, - ) - - # yank modified - print( - f'Unpublishing {len(modified_to_delete_and_reupload)} published but ' - f'locally modified files in {release_homepage_url}') - - release = ghapi.get_release_by_tag(tag_name) - - for pth in modified_to_delete_and_reupload: - filename = os.path.basename(pth) - asset_id = ghapi.find_asset_id_by_file_name(filename, release) - print (f' Unpublishing file: {filename}).') - response = ghapi.delete_asset(asset_id) - if response.status_code != requests.codes.no_content: # NOQA - raise Exception(f'failed asset deletion: {response}') - - # finally upload new and modified - to_upload = new_to_upload + modified_to_delete_and_reupload - print(f'Publishing with {len(to_upload)} files to {release_homepage_url}') - release = grr.Release(tag_name=tag_name, body=description) - grr.make_release(ghapi, release, to_upload) - - -TOKEN_HELP = ( - 'The Github personal acess token is used to authenticate API calls. ' - 'Required unless you set the GITHUB_TOKEN environment variable as an alternative. ' - 'See for details: https://github.com/settings/tokens and ' - 'https://docs.github.com/en/github/authenticating-to-github/creating-a-personal-access-token' -) - - -@click.command() - -@click.option( - '--user-repo-tag', - help='The GitHub qualified repository user/name/tag in which ' - 'to create the release such as in nexB/thirdparty/pypi', - type=str, - required=True, -) -@click.option( - '-d', '--directory', - help='The directory that contains files to upload to the release.', - type=click.Path(exists=True, readable=True, path_type=str, file_okay=False, resolve_path=True), - required=True, -) -@click.option( - '--token', - help=TOKEN_HELP, - default=os.environ.get('GITHUB_TOKEN', None), - type=str, - required=False, -) -@click.option( - '--description', - help='Text description for the release. Ignored if the release exists.', - default=None, - type=str, - required=False, -) -@click.option( - '--retry_limit', - help='Number of retries when making failing GitHub API calls. ' - 'Retrying helps work around transient failures of the GitHub API.', - type=int, - default=10, -) -@click.help_option('-h', '--help') -def publish_files( - user_repo_tag, - directory, - retry_limit=10, token=None, description=None, -): - """ - Publish all the files in DIRECTORY as assets to a GitHub release. - Either create or update/replace remote files' - """ - if not token: - click.secho('--token required option is missing.') - click.secho(TOKEN_HELP) - sys.exit(1) - - user, repo, tag_name = user_repo_tag.split('/') - - create_or_update_release_and_upload_directory( - user=user, - repo=repo, - tag_name=tag_name, - description=description, - retry_limit=retry_limit, - token=token, - directory=directory, - ) - - -if __name__ == '__main__': - publish_files() diff --git a/etc/scripts/requirements.txt b/etc/scripts/requirements.txt index 6591e49..ebb404b 100644 --- a/etc/scripts/requirements.txt +++ b/etc/scripts/requirements.txt @@ -1,12 +1,11 @@ aboutcode_toolkit -github-release-retry2 attrs commoncode click requests saneyaml -romp pip setuptools twine -wheel \ No newline at end of file +wheel +build \ No newline at end of file diff --git a/etc/scripts/test_utils_pip_compatibility_tags.py b/etc/scripts/test_utils_pip_compatibility_tags.py index 30c4dda..98187c5 100644 --- a/etc/scripts/test_utils_pip_compatibility_tags.py +++ b/etc/scripts/test_utils_pip_compatibility_tags.py @@ -33,23 +33,25 @@ import utils_pip_compatibility_tags -@pytest.mark.parametrize('version_info, expected', [ - ((2,), '2'), - ((2, 8), '28'), - ((3,), '3'), - ((3, 6), '36'), - # Test a tuple of length 3. - ((3, 6, 5), '36'), - # Test a 2-digit minor version. - ((3, 10), '310'), -]) +@pytest.mark.parametrize( + "version_info, expected", + [ + ((2,), "2"), + ((2, 8), "28"), + ((3,), "3"), + ((3, 6), "36"), + # Test a tuple of length 3. + ((3, 6, 5), "36"), + # Test a 2-digit minor version. + ((3, 10), "310"), + ], +) def test_version_info_to_nodot(version_info, expected): - actual = pip_compatibility_tags.version_info_to_nodot(version_info) + actual = utils_pip_compatibility_tags.version_info_to_nodot(version_info) assert actual == expected class Testcompatibility_tags(object): - def mock_get_config_var(self, **kwd): """ Patch sysconfig.get_config_var for arbitrary keys. @@ -69,60 +71,60 @@ def test_no_hyphen_tag(self): """ import pip._internal.utils.compatibility_tags - mock_gcf = self.mock_get_config_var(SOABI='cpython-35m-darwin') + mock_gcf = self.mock_get_config_var(SOABI="cpython-35m-darwin") - with patch('sysconfig.get_config_var', mock_gcf): + with patch("sysconfig.get_config_var", mock_gcf): supported = pip._internal.utils.compatibility_tags.get_supported() for tag in supported: - assert '-' not in tag.interpreter - assert '-' not in tag.abi - assert '-' not in tag.platform + assert "-" not in tag.interpreter + assert "-" not in tag.abi + assert "-" not in tag.platform class TestManylinux2010Tags(object): - - @pytest.mark.parametrize("manylinux2010,manylinux1", [ - ("manylinux2010_x86_64", "manylinux1_x86_64"), - ("manylinux2010_i686", "manylinux1_i686"), - ]) + @pytest.mark.parametrize( + "manylinux2010,manylinux1", + [ + ("manylinux2010_x86_64", "manylinux1_x86_64"), + ("manylinux2010_i686", "manylinux1_i686"), + ], + ) def test_manylinux2010_implies_manylinux1(self, manylinux2010, manylinux1): """ Specifying manylinux2010 implies manylinux1. """ groups = {} - supported = pip_compatibility_tags.get_supported(platforms=[manylinux2010]) + supported = utils_pip_compatibility_tags.get_supported(platforms=[manylinux2010]) for tag in supported: - groups.setdefault( - (tag.interpreter, tag.abi), [] - ).append(tag.platform) + groups.setdefault((tag.interpreter, tag.abi), []).append(tag.platform) for arches in groups.values(): - if arches == ['any']: + if arches == ["any"]: continue assert arches[:2] == [manylinux2010, manylinux1] class TestManylinux2014Tags(object): - - @pytest.mark.parametrize("manylinuxA,manylinuxB", [ - ("manylinux2014_x86_64", ["manylinux2010_x86_64", "manylinux1_x86_64"]), - ("manylinux2014_i686", ["manylinux2010_i686", "manylinux1_i686"]), - ]) + @pytest.mark.parametrize( + "manylinuxA,manylinuxB", + [ + ("manylinux2014_x86_64", ["manylinux2010_x86_64", "manylinux1_x86_64"]), + ("manylinux2014_i686", ["manylinux2010_i686", "manylinux1_i686"]), + ], + ) def test_manylinuxA_implies_manylinuxB(self, manylinuxA, manylinuxB): """ Specifying manylinux2014 implies manylinux2010/manylinux1. """ groups = {} - supported = pip_compatibility_tags.get_supported(platforms=[manylinuxA]) + supported = utils_pip_compatibility_tags.get_supported(platforms=[manylinuxA]) for tag in supported: - groups.setdefault( - (tag.interpreter, tag.abi), [] - ).append(tag.platform) + groups.setdefault((tag.interpreter, tag.abi), []).append(tag.platform) expected_arches = [manylinuxA] expected_arches.extend(manylinuxB) for arches in groups.values(): - if arches == ['any']: + if arches == ["any"]: continue assert arches[:3] == expected_arches diff --git a/etc/scripts/test_utils_pypi_supported_tags.py b/etc/scripts/test_utils_pypi_supported_tags.py index 9ad68b2..d291572 100644 --- a/etc/scripts/test_utils_pypi_supported_tags.py +++ b/etc/scripts/test_utils_pypi_supported_tags.py @@ -29,6 +29,7 @@ def validate_wheel_filename_for_pypi(filename): an empty list if all tags are supported. """ from utils_thirdparty import Wheel + wheel = Wheel.from_filename(filename) return validate_platforms_for_pypi(wheel.platforms) diff --git a/etc/scripts/utils_dejacode.py b/etc/scripts/utils_dejacode.py index 8b6e5d2..f28e247 100644 --- a/etc/scripts/utils_dejacode.py +++ b/etc/scripts/utils_dejacode.py @@ -21,19 +21,19 @@ Utility to create and retrieve package and ABOUT file data from DejaCode. """ -DEJACODE_API_KEY = os.environ.get('DEJACODE_API_KEY', '') -DEJACODE_API_URL = os.environ.get('DEJACODE_API_URL', '') +DEJACODE_API_KEY = os.environ.get("DEJACODE_API_KEY", "") +DEJACODE_API_URL = os.environ.get("DEJACODE_API_URL", "") -DEJACODE_API_URL_PACKAGES = f'{DEJACODE_API_URL}packages/' +DEJACODE_API_URL_PACKAGES = f"{DEJACODE_API_URL}packages/" DEJACODE_API_HEADERS = { - 'Authorization': 'Token {}'.format(DEJACODE_API_KEY), - 'Accept': 'application/json; indent=4', + "Authorization": "Token {}".format(DEJACODE_API_KEY), + "Accept": "application/json; indent=4", } def can_do_api_calls(): if not DEJACODE_API_KEY and DEJACODE_API_URL: - print('DejaCode DEJACODE_API_KEY and DEJACODE_API_URL not configured. Doing nothing') + print("DejaCode DEJACODE_API_KEY and DEJACODE_API_URL not configured. Doing nothing") return False else: return True @@ -53,7 +53,7 @@ def fetch_dejacode_packages(params): headers=DEJACODE_API_HEADERS, ) - return response.json()['results'] + return response.json()["results"] def get_package_data(distribution): @@ -68,9 +68,9 @@ def get_package_data(distribution): return results[0] elif len_results > 1: - print(f'More than 1 entry exists, review at: {DEJACODE_API_URL_PACKAGES}') + print(f"More than 1 entry exists, review at: {DEJACODE_API_URL_PACKAGES}") else: - print('Could not find package:', distribution.download_url) + print("Could not find package:", distribution.download_url) def update_with_dejacode_data(distribution): @@ -82,7 +82,7 @@ def update_with_dejacode_data(distribution): if package_data: return distribution.update(package_data, keep_extra=False) - print(f'No package found for: {distribution}') + print(f"No package found for: {distribution}") def update_with_dejacode_about_data(distribution): @@ -92,19 +92,19 @@ def update_with_dejacode_about_data(distribution): """ package_data = get_package_data(distribution) if package_data: - package_api_url = package_data['api_url'] - about_url = f'{package_api_url}about' + package_api_url = package_data["api_url"] + about_url = f"{package_api_url}about" response = requests.get(about_url, headers=DEJACODE_API_HEADERS) # note that this is YAML-formatted - about_text = response.json()['about_data'] + about_text = response.json()["about_data"] about_data = saneyaml.load(about_text) return distribution.update(about_data, keep_extra=True) - print(f'No package found for: {distribution}') + print(f"No package found for: {distribution}") -def fetch_and_save_about_files(distribution, dest_dir='thirdparty'): +def fetch_and_save_about_files(distribution, dest_dir="thirdparty"): """ Fetch and save in `dest_dir` the .ABOUT, .LICENSE and .NOTICE files fetched from DejaCode for a Distribution `distribution`. Return True if files were @@ -112,8 +112,8 @@ def fetch_and_save_about_files(distribution, dest_dir='thirdparty'): """ package_data = get_package_data(distribution) if package_data: - package_api_url = package_data['api_url'] - about_url = f'{package_api_url}about_files' + package_api_url = package_data["api_url"] + about_url = f"{package_api_url}about_files" response = requests.get(about_url, headers=DEJACODE_API_HEADERS) about_zip = response.content with io.BytesIO(about_zip) as zf: @@ -121,7 +121,7 @@ def fetch_and_save_about_files(distribution, dest_dir='thirdparty'): zi.extractall(path=dest_dir) return True - print(f'No package found for: {distribution}') + print(f"No package found for: {distribution}") def find_latest_dejacode_package(distribution): @@ -138,9 +138,9 @@ def find_latest_dejacode_package(distribution): for package_data in packages: matched = ( - package_data['download_url'] == distribution.download_url - and package_data['version'] == distribution.version - and package_data['filename'] == distribution.filename + package_data["download_url"] == distribution.download_url + and package_data["version"] == distribution.version + and package_data["filename"] == distribution.filename ) if matched: @@ -149,12 +149,11 @@ def find_latest_dejacode_package(distribution): # there was no exact match, find the latest version # TODO: consider the closest version rather than the latest # or the version that has the best data - with_versions = [(packaging_version.parse(p['version']), p) for p in packages] + with_versions = [(packaging_version.parse(p["version"]), p) for p in packages] with_versions = sorted(with_versions) latest_version, latest_package_version = sorted(with_versions)[-1] print( - f'Found DejaCode latest version: {latest_version} ' - f'for dist: {distribution.package_url}', + f"Found DejaCode latest version: {latest_version} " f"for dist: {distribution.package_url}", ) return latest_package_version @@ -172,27 +171,26 @@ def create_dejacode_package(distribution): if existing_package_data: return existing_package_data - print(f'Creating new DejaCode package for: {distribution}') + print(f"Creating new DejaCode package for: {distribution}") new_package_payload = { # Trigger data collection, scan, and purl - 'collect_data': 1, + "collect_data": 1, } fields_to_carry_over = [ - 'download_url' - 'type', - 'namespace', - 'name', - 'version', - 'qualifiers', - 'subpath', - 'license_expression', - 'copyright', - 'description', - 'homepage_url', - 'primary_language', - 'notice_text', + "download_url" "type", + "namespace", + "name", + "version", + "qualifiers", + "subpath", + "license_expression", + "copyright", + "description", + "homepage_url", + "primary_language", + "notice_text", ] for field in fields_to_carry_over: @@ -207,7 +205,7 @@ def create_dejacode_package(distribution): ) new_package_data = response.json() if response.status_code != 201: - raise Exception(f'Error, cannot create package for: {distribution}') + raise Exception(f"Error, cannot create package for: {distribution}") print(f'New Package created at: {new_package_data["absolute_url"]}') return new_package_data diff --git a/etc/scripts/utils_pip_compatibility_tags.py b/etc/scripts/utils_pip_compatibility_tags.py index 4c6529b..5d5eb34 100644 --- a/etc/scripts/utils_pip_compatibility_tags.py +++ b/etc/scripts/utils_pip_compatibility_tags.py @@ -36,13 +36,13 @@ mac_platforms, ) -_osx_arch_pat = re.compile(r'(.+)_(\d+)_(\d+)_(.+)') +_osx_arch_pat = re.compile(r"(.+)_(\d+)_(\d+)_(.+)") def version_info_to_nodot(version_info): # type: (Tuple[int, ...]) -> str # Only use up to the first two numbers. - return ''.join(map(str, version_info[:2])) + return "".join(map(str, version_info[:2])) def _mac_platforms(arch): @@ -57,7 +57,7 @@ def _mac_platforms(arch): # actual prefix provided by the user in case they provided # something like "macosxcustom_". It may be good to remove # this as undocumented or deprecate it in the future. - '{}_{}'.format(name, arch[len('macosx_'):]) + "{}_{}".format(name, arch[len("macosx_") :]) for arch in mac_platforms(mac_version, actual_arch) ] else: @@ -69,31 +69,31 @@ def _mac_platforms(arch): def _custom_manylinux_platforms(arch): # type: (str) -> List[str] arches = [arch] - arch_prefix, arch_sep, arch_suffix = arch.partition('_') - if arch_prefix == 'manylinux2014': + arch_prefix, arch_sep, arch_suffix = arch.partition("_") + if arch_prefix == "manylinux2014": # manylinux1/manylinux2010 wheels run on most manylinux2014 systems # with the exception of wheels depending on ncurses. PEP 599 states # manylinux1/manylinux2010 wheels should be considered # manylinux2014 wheels: # https://www.python.org/dev/peps/pep-0599/#backwards-compatibility-with-manylinux2010-wheels - if arch_suffix in {'i686', 'x86_64'}: - arches.append('manylinux2010' + arch_sep + arch_suffix) - arches.append('manylinux1' + arch_sep + arch_suffix) - elif arch_prefix == 'manylinux2010': + if arch_suffix in {"i686", "x86_64"}: + arches.append("manylinux2010" + arch_sep + arch_suffix) + arches.append("manylinux1" + arch_sep + arch_suffix) + elif arch_prefix == "manylinux2010": # manylinux1 wheels run on most manylinux2010 systems with the # exception of wheels depending on ncurses. PEP 571 states # manylinux1 wheels should be considered manylinux2010 wheels: # https://www.python.org/dev/peps/pep-0571/#backwards-compatibility-with-manylinux1-wheels - arches.append('manylinux1' + arch_sep + arch_suffix) + arches.append("manylinux1" + arch_sep + arch_suffix) return arches def _get_custom_platforms(arch): # type: (str) -> List[str] - arch_prefix, _arch_sep, _arch_suffix = arch.partition('_') - if arch.startswith('macosx'): + arch_prefix, _arch_sep, _arch_suffix = arch.partition("_") + if arch.startswith("macosx"): arches = _mac_platforms(arch) - elif arch_prefix in ['manylinux2014', 'manylinux2010']: + elif arch_prefix in ["manylinux2014", "manylinux2010"]: arches = _custom_manylinux_platforms(arch) else: arches = [arch] @@ -139,7 +139,7 @@ def get_supported( version=None, # type: Optional[str] platforms=None, # type: Optional[List[str]] impl=None, # type: Optional[str] - abis=None # type: Optional[List[str]] + abis=None, # type: Optional[List[str]] ): # type: (...) -> List[Tag] """Return a list of supported tags for each version specified in diff --git a/etc/scripts/utils_pypi_supported_tags.py b/etc/scripts/utils_pypi_supported_tags.py index 8dcb70f..de9f21b 100644 --- a/etc/scripts/utils_pypi_supported_tags.py +++ b/etc/scripts/utils_pypi_supported_tags.py @@ -82,11 +82,7 @@ def is_supported_platform_tag(platform_tag): if platform_tag in _allowed_platforms: return True m = _macosx_platform_re.match(platform_tag) - if ( - m - and m.group("major") in _macosx_major_versions - and m.group("arch") in _macosx_arches - ): + if m and m.group("major") in _macosx_major_versions and m.group("arch") in _macosx_arches: return True m = _manylinux_platform_re.match(platform_tag) if m and m.group("arch") in _manylinux_arches: diff --git a/etc/scripts/utils_requirements.py b/etc/scripts/utils_requirements.py index ddbed61..7c99a33 100644 --- a/etc/scripts/utils_requirements.py +++ b/etc/scripts/utils_requirements.py @@ -8,72 +8,83 @@ # See https://github.com/nexB/skeleton for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # +import re import subprocess """ Utilities to manage requirements files and call pip. -NOTE: this should use ONLY the standard library and not import anything else. +NOTE: this should use ONLY the standard library and not import anything else +because this is used for boostrapping with no requirements installed. """ -def load_requirements(requirements_file='requirements.txt', force_pinned=True): +def load_requirements(requirements_file="requirements.txt", with_unpinned=False): """ Yield package (name, version) tuples for each requirement in a `requirement` - file. Every requirement versions must be pinned if `force_pinned` is True. - Otherwise un-pinned requirements are returned with a None version + file. Only accept requirements pinned to an exact version. """ with open(requirements_file) as reqs: req_lines = reqs.read().splitlines(False) - return get_required_name_versions(req_lines, force_pinned) + return get_required_name_versions(req_lines, with_unpinned=with_unpinned) -def get_required_name_versions(requirement_lines, force_pinned=True): +def get_required_name_versions(requirement_lines, with_unpinned=False): """ Yield required (name, version) tuples given a`requirement_lines` iterable of - requirement text lines. Every requirement versions must be pinned if - `force_pinned` is True. Otherwise un-pinned requirements are returned with a - None version + requirement text lines. Only accept requirements pinned to an exact version. """ + for req_line in requirement_lines: req_line = req_line.strip() - if not req_line or req_line.startswith('#'): + if not req_line or req_line.startswith("#"): continue - if '==' not in req_line and force_pinned: - raise Exception(f'Requirement version is not pinned: {req_line}') - name = req_line - version = None - else: - name, _, version = req_line.partition('==') - name = name.lower().strip() - version = version.lower().strip() - yield name, version - - -def parse_requires(requires): - """ - Return a list of requirement lines extracted from the `requires` text from - a setup.cfg *_requires section such as the "install_requires" section. + if req_line.startswith("-") or (not with_unpinned and not "==" in req_line): + print(f"Requirement line is not supported: ignored: {req_line}") + continue + yield get_required_name_version(requirement=req_line, with_unpinned=with_unpinned) + + +def get_required_name_version(requirement, with_unpinned=False): """ - requires = [c for c in requires.splitlines(False) if c] - if not requires: - return [] + Return a (name, version) tuple given a`requirement` specifier string. + Requirement version must be pinned. If ``with_unpinned`` is True, unpinned + requirements are accepted and only the name portion is returned. - requires = [''.join(r.split()) for r in requires if r and r.strip()] - return sorted(requires) + For example: + >>> assert get_required_name_version("foo==1.2.3") == ("foo", "1.2.3") + >>> assert get_required_name_version("fooA==1.2.3.DEV1") == ("fooa", "1.2.3.dev1") + >>> assert get_required_name_version("foo==1.2.3", with_unpinned=False) == ("foo", "1.2.3") + >>> assert get_required_name_version("foo", with_unpinned=True) == ("foo", "") + >>> assert get_required_name_version("foo>=1.2", with_unpinned=True) == ("foo", ""), get_required_name_version("foo>=1.2") + >>> try: + ... assert not get_required_name_version("foo", with_unpinned=False) + ... except Exception as e: + ... assert "Requirement version must be pinned" in str(e) + """ + requirement = requirement and "".join(requirement.lower().split()) + assert requirement, f"specifier is required is empty:{requirement!r}" + name, operator, version = split_req(requirement) + assert name, f"Name is required: {requirement}" + is_pinned = operator == "==" + if with_unpinned: + version = "" + else: + assert is_pinned and version, f"Requirement version must be pinned: {requirement}" + return name, version -def lock_requirements(requirements_file='requirements.txt', site_packages_dir=None): +def lock_requirements(requirements_file="requirements.txt", site_packages_dir=None): """ Freeze and lock current installed requirements and save this to the `requirements_file` requirements file. """ - with open(requirements_file, 'w') as fo: + with open(requirements_file, "w") as fo: fo.write(get_installed_reqs(site_packages_dir=site_packages_dir)) def lock_dev_requirements( - dev_requirements_file='requirements-dev.txt', - main_requirements_file='requirements.txt', + dev_requirements_file="requirements-dev.txt", + main_requirements_file="requirements.txt", site_packages_dir=None, ): """ @@ -89,15 +100,56 @@ def lock_dev_requirements( all_req_nvs = get_required_name_versions(all_req_lines) dev_only_req_nvs = {n: v for n, v in all_req_nvs if n not in main_names} - new_reqs = '\n'.join(f'{n}=={v}' for n, v in sorted(dev_only_req_nvs.items())) - with open(dev_requirements_file, 'w') as fo: + new_reqs = "\n".join(f"{n}=={v}" for n, v in sorted(dev_only_req_nvs.items())) + with open(dev_requirements_file, "w") as fo: fo.write(new_reqs) def get_installed_reqs(site_packages_dir): """ - Return the installed pip requirements as text found in `site_packages_dir` as a text. + Return the installed pip requirements as text found in `site_packages_dir` + as a text. + """ + if not os.path.exists(site_packages_dir): + raise Exception(f"site_packages directory: {site_packages_dir!r} does not exists") + # Also include these packages in the output with --all: wheel, distribute, + # setuptools, pip + args = ["pip", "freeze", "--exclude-editable", "--all", "--path", site_packages_dir] + return subprocess.check_output(args, encoding="utf-8") + + +comparators = ( + "===", + "~=", + "!=", + "==", + "<=", + ">=", + ">", + "<", +) + +_comparators_re = r"|".join(comparators) +version_splitter = re.compile(rf"({_comparators_re})") + + +def split_req(req): + """ + Return a three-tuple of (name, comparator, version) given a ``req`` + requirement specifier string. Each segment may be empty. Spaces are removed. + + For example: + >>> assert split_req("foo==1.2.3") == ("foo", "==", "1.2.3"), split_req("foo==1.2.3") + >>> assert split_req("foo") == ("foo", "", ""), split_req("foo") + >>> assert split_req("==1.2.3") == ("", "==", "1.2.3"), split_req("==1.2.3") + >>> assert split_req("foo >= 1.2.3 ") == ("foo", ">=", "1.2.3"), split_req("foo >= 1.2.3 ") + >>> assert split_req("foo>=1.2") == ("foo", ">=", "1.2"), split_req("foo>=1.2") """ - # Also include these packages in the output with --all: wheel, distribute, setuptools, pip - args = ['pip', 'freeze', '--exclude-editable', '--all', '--path', site_packages_dir] - return subprocess.check_output(args, encoding='utf-8') + assert req + # do not allow multiple constraints and tags + assert not any(c in req for c in ",;") + req = "".join(req.split()) + if not any(c in req for c in comparators): + return req, "", "" + segments = version_splitter.split(req, maxsplit=1) + return tuple(segments) diff --git a/etc/scripts/utils_thirdparty.py b/etc/scripts/utils_thirdparty.py index 7613a0c..9cbda37 100644 --- a/etc/scripts/utils_thirdparty.py +++ b/etc/scripts/utils_thirdparty.py @@ -8,47 +8,45 @@ # See https://github.com/nexB/skeleton for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # -from collections import defaultdict import email import itertools -import operator import os import re import shutil import subprocess -import tarfile import tempfile import time import urllib +from collections import defaultdict +from urllib.parse import quote_plus import attr import license_expression import packageurl import requests import saneyaml -import utils_pip_compatibility_tags -import utils_pypi_supported_tags - from commoncode import fileutils from commoncode.hash import multi_checksums from commoncode.text import python_safe_name from packaging import tags as packaging_tags from packaging import version as packaging_version -from utils_requirements import load_requirements + +import utils_pip_compatibility_tags """ Utilities to manage Python thirparty libraries source, binaries and metadata in local directories and remote repositories. -- update pip requirement files from installed packages for prod. and dev. -- build and save wheels for all required packages -- also build variants for wheels with native code for all each supported - operating systems (Linux, macOS, Windows) and Python versions (3.x) - combinations using remote Ci jobs -- collect source distributions for all required packages -- keep in sync wheels, distributions, ABOUT and LICENSE files to a PyPI-like - repository (using GitHub) -- create, update and fetch ABOUT, NOTICE and LICENSE metadata for all distributions +- download wheels for packages for all each supported operating systems + (Linux, macOS, Windows) and Python versions (3.x) combinations + +- download sources for packages (aka. sdist) + +- create, update and download ABOUT, NOTICE and LICENSE metadata for these + wheels and source distributions + +- update pip requirement files based on actually installed packages for + production and development Approach @@ -56,224 +54,262 @@ The processing is organized around these key objects: -- A PyPiPackage represents a PyPI package with its name and version. It tracks - the downloadable Distribution objects for that version: +- A PyPiPackage represents a PyPI package with its name and version and the + metadata used to populate an .ABOUT file and document origin and license. + It contains the downloadable Distribution objects for that version: - - one Sdist source Distribution object - - a list of Wheel binary Distribution objects + - one Sdist source Distribution + - a list of Wheel binary Distribution - A Distribution (either a Wheel or Sdist) is identified by and created from its - filename. It also has the metadata used to populate an .ABOUT file and - document origin and license. A Distribution can be fetched from Repository. - Metadata can be loaded from and dumped to ABOUT files and optionally from - DejaCode package data. + filename as well as its name and version. + A Distribution is fetched from a Repository. + Distribution metadata can be loaded from and dumped to ABOUT files. + +- A Wheel binary Distribution can have Python/Platform/OS tags it supports and + was built for and these tags can be matched to an Environment. + +- An Environment is a combination of a Python version and operating system + (e.g., platfiorm and ABI tags.) and is represented by the "tags" it supports. + +- A plain LinksRepository which is just a collection of URLs scrape from a web + page such as HTTP diretory listing. It is used either with pip "--find-links" + option or to fetch ABOUT and LICENSE files. + +- A PypiSimpleRepository is a PyPI "simple" index where a HTML page is listing + package name links. Each such link points to an HTML page listing URLs to all + wheels and sdsist of all versions of this package. + +PypiSimpleRepository and Packages are related through packages name, version and +filenames. + +The Wheel models code is partially derived from the mit-licensed pip and the +Distribution/Wheel/Sdist design has been heavily inspired by the packaging- +dists library https://github.com/uranusjr/packaging-dists by Tzu-ping Chung +""" -- An Environment is a combination of a Python version and operating system. - A Wheel Distribution also has Python/OS tags is supports and these can be - supported in a given Environment. +""" +Wheel downloader + +- parse requirement file +- create a TODO queue of requirements to process +- done: create an empty map of processed binary requirements as {package name: (list of versions/tags} -- Paths or URLs to "filenames" live in a Repository, either a plain - LinksRepository (an HTML page listing URLs or a local directory) or a - PypiRepository (a PyPI simple index where each package name has an HTML page - listing URLs to all distribution types and versions). - Repositories and Distributions are related through filenames. + +- while we have package reqs in TODO queue, process one requirement: + - for each PyPI simple index: + - fetch through cache the PyPI simple index for this package + - for each environment: + - find a wheel matching pinned requirement in this index + - if file exist locally, continue + - fetch the wheel for env + - IF pure, break, no more needed for env + - collect requirement deps from wheel metadata and add to queue + - if fetched, break, otherwise display error message - The Wheel models code is partially derived from the mit-licensed pip and the - Distribution/Wheel/Sdist design has been heavily inspired by the packaging- - dists library https://github.com/uranusjr/packaging-dists by Tzu-ping Chung """ TRACE = False +TRACE_DEEP = False +TRACE_ULTRA_DEEP = False # Supported environments -PYTHON_VERSIONS = '36', '37', '38', '39', +PYTHON_VERSIONS = "36", "37", "38", "39", "310" + +PYTHON_DOT_VERSIONS_BY_VER = { + "36": "3.6", + "37": "3.7", + "38": "3.8", + "39": "3.9", + "310": "3.10", +} + + +def get_python_dot_version(version): + """ + Return a dot version from a plain, non-dot version. + """ + return PYTHON_DOT_VERSIONS_BY_VER[version] + ABIS_BY_PYTHON_VERSION = { - '36':['cp36', 'cp36m'], - '37':['cp37', 'cp37m'], - '38':['cp38', 'cp38m'], - '39':['cp39', 'cp39m'], + "36": ["cp36", "cp36m", "abi3"], + "37": ["cp37", "cp37m", "abi3"], + "38": ["cp38", "cp38m", "abi3"], + "39": ["cp39", "cp39m", "abi3"], + "310": ["cp310", "cp310m", "abi3"], } PLATFORMS_BY_OS = { - 'linux': [ - 'linux_x86_64', - 'manylinux1_x86_64', - 'manylinux2014_x86_64', - 'manylinux2010_x86_64', + "linux": [ + "linux_x86_64", + "manylinux1_x86_64", + "manylinux2010_x86_64", + "manylinux2014_x86_64", ], - 'macos': [ - 'macosx_10_6_intel', 'macosx_10_6_x86_64', - 'macosx_10_9_intel', 'macosx_10_9_x86_64', - 'macosx_10_10_intel', 'macosx_10_10_x86_64', - 'macosx_10_11_intel', 'macosx_10_11_x86_64', - 'macosx_10_12_intel', 'macosx_10_12_x86_64', - 'macosx_10_13_intel', 'macosx_10_13_x86_64', - 'macosx_10_14_intel', 'macosx_10_14_x86_64', - 'macosx_10_15_intel', 'macosx_10_15_x86_64', + "macos": [ + "macosx_10_6_intel", + "macosx_10_6_x86_64", + "macosx_10_9_intel", + "macosx_10_9_x86_64", + "macosx_10_10_intel", + "macosx_10_10_x86_64", + "macosx_10_11_intel", + "macosx_10_11_x86_64", + "macosx_10_12_intel", + "macosx_10_12_x86_64", + "macosx_10_13_intel", + "macosx_10_13_x86_64", + "macosx_10_14_intel", + "macosx_10_14_x86_64", + "macosx_10_15_intel", + "macosx_10_15_x86_64", + "macosx_11_0_x86_64", + "macosx_11_intel", + "macosx_11_0_x86_64", + "macosx_11_intel", + "macosx_10_9_universal2", + "macosx_10_10_universal2", + "macosx_10_11_universal2", + "macosx_10_12_universal2", + "macosx_10_13_universal2", + "macosx_10_14_universal2", + "macosx_10_15_universal2", + "macosx_11_0_universal2", + # 'macosx_11_0_arm64', ], - 'windows': [ - 'win_amd64', + "windows": [ + "win_amd64", ], } -THIRDPARTY_DIR = 'thirdparty' -CACHE_THIRDPARTY_DIR = '.cache/thirdparty' +THIRDPARTY_DIR = "thirdparty" +CACHE_THIRDPARTY_DIR = ".cache/thirdparty" + +################################################################################ -REMOTE_LINKS_URL = 'https://thirdparty.aboutcode.org/pypi' +ABOUT_BASE_URL = "https://thirdparty.aboutcode.org/pypi" +ABOUT_PYPI_SIMPLE_URL = f"{ABOUT_BASE_URL}/simple" +ABOUT_LINKS_URL = f"{ABOUT_PYPI_SIMPLE_URL}/links.html" +PYPI_SIMPLE_URL = "https://pypi.org/simple" +PYPI_INDEX_URLS = (PYPI_SIMPLE_URL, ABOUT_PYPI_SIMPLE_URL) -EXTENSIONS_APP = '.pyz', -EXTENSIONS_SDIST = '.tar.gz', '.tar.bz2', '.zip', '.tar.xz', -EXTENSIONS_INSTALLABLE = EXTENSIONS_SDIST + ('.whl',) -EXTENSIONS_ABOUT = '.ABOUT', '.LICENSE', '.NOTICE', -EXTENSIONS = EXTENSIONS_INSTALLABLE + EXTENSIONS_ABOUT + EXTENSIONS_APP +################################################################################ -PYPI_SIMPLE_URL = 'https://pypi.org/simple' +EXTENSIONS_APP = (".pyz",) +EXTENSIONS_SDIST = ( + ".tar.gz", + ".zip", + ".tar.xz", +) +EXTENSIONS_INSTALLABLE = EXTENSIONS_SDIST + (".whl",) +EXTENSIONS_ABOUT = ( + ".ABOUT", + ".LICENSE", + ".NOTICE", +) +EXTENSIONS = EXTENSIONS_INSTALLABLE + EXTENSIONS_ABOUT + EXTENSIONS_APP -LICENSEDB_API_URL = 'https://scancode-licensedb.aboutcode.org' +LICENSEDB_API_URL = "https://scancode-licensedb.aboutcode.org" LICENSING = license_expression.Licensing() +collect_urls = re.compile('href="([^"]+)"').findall + ################################################################################ -# -# Fetch remote wheels and sources locally -# +# Fetch wheels and sources locally ################################################################################ -def fetch_wheels( - environment=None, - requirements_file='requirements.txt', - allow_unpinned=False, - dest_dir=THIRDPARTY_DIR, - remote_links_url=REMOTE_LINKS_URL, -): - """ - Download all of the wheel of packages listed in the ``requirements_file`` - requirements file into ``dest_dir`` directory. - - Only get wheels for the ``environment`` Enviromnent constraints. If the - provided ``environment`` is None then the current Python interpreter - environment is used implicitly. +class DistributionNotFound(Exception): + pass - Only accept pinned requirements (e.g. with a version) unless - ``allow_unpinned`` is True. - Use exclusively direct downloads from a remote repo at URL - ``remote_links_url``. If ``remote_links_url`` is a path, use this as a - directory of links instead of a URL. +def download_wheel(name, version, environment, dest_dir=THIRDPARTY_DIR, repos=tuple()): + """ + Download the wheels binary distribution(s) of package ``name`` and + ``version`` matching the ``environment`` Environment constraints into the + ``dest_dir`` directory. Return a list of fetched_wheel_filenames, possibly + empty. - Yield tuples of (PypiPackage, error) where is None on success. + Use the first PyPI simple repository from a list of ``repos`` that contains this wheel. """ - missed = [] + if TRACE_DEEP: + print(f" download_wheel: {name}=={version} for envt: {environment}") - if not allow_unpinned: - force_pinned = True - else: - force_pinned = False + if not repos: + repos = DEFAULT_PYPI_REPOS - try: - rrp = list(get_required_remote_packages( - requirements_file=requirements_file, - force_pinned=force_pinned, - remote_links_url=remote_links_url, - )) - except Exception as e: - raise Exception( - dict( - requirements_file=requirements_file, - force_pinned=force_pinned, - remote_links_url=remote_links_url, - ) - ) from e + fetched_wheel_filenames = [] - fetched_filenames = set() - for name, version, package in rrp: + for repo in repos: + package = repo.get_package_version(name=name, version=version) if not package: - missed.append((name, version,)) - nv = f'{name}=={version}' if version else name - yield None, f'fetch_wheels: Missing package in remote repo: {nv}' + if TRACE_DEEP: + print(f" download_wheel: No package in {repo.index_url} for {name}=={version}") + continue + supported_wheels = list(package.get_supported_wheels(environment=environment)) + if not supported_wheels: + if TRACE_DEEP: + print( + f" download_wheel: No supported wheel for {name}=={version}: {environment} " + ) + continue - else: - fetched_filename = package.fetch_wheel( - environment=environment, - fetched_filenames=fetched_filenames, - dest_dir=dest_dir, - ) + for wheel in supported_wheels: + if TRACE_DEEP: + print( + f" download_wheel: Getting wheel from index (or cache): {wheel.download_url}" + ) + fetched_wheel_filename = wheel.download(dest_dir=dest_dir) + fetched_wheel_filenames.append(fetched_wheel_filename) - if fetched_filename: - fetched_filenames.add(fetched_filename) - error = None - else: - if fetched_filename in fetched_filenames: - error = None - else: - error = f'Failed to fetch' - yield package, error - - if missed: - rr = get_remote_repo() - print() - print(f'===> fetch_wheels: Missed some packages') - for n, v in missed: - nv = f'{n}=={v}' if v else n - print(f'Missed package {nv} in remote repo, has only:') - for pv in rr.get_versions(n): - print(' ', pv) - raise Exception('Missed some packages in remote repo') - - -def fetch_sources( - requirements_file='requirements.txt', - allow_unpinned=False, - dest_dir=THIRDPARTY_DIR, - remote_links_url=REMOTE_LINKS_URL, -): - """ - Download all of the dependent package sources listed in the - ``requirements_file`` requirements file into ``dest_dir`` destination - directory. + if fetched_wheel_filenames: + # do not futher fetch from other repos if we find in first, typically PyPI + break - Use direct downloads to achieve this (not pip download). Use exclusively the - packages found from a remote repo at URL ``remote_links_url``. If - ``remote_links_url`` is a path, use this as a directory of links instead of - a URL. + return fetched_wheel_filenames - Only accept pinned requirements (e.g. with a version) unless - ``allow_unpinned`` is True. - Yield tuples of (PypiPackage, error message) for each package where error - message will empty on success. +def download_sdist(name, version, dest_dir=THIRDPARTY_DIR, repos=tuple()): """ - missed = [] + Download the sdist source distribution of package ``name`` and ``version`` + into the ``dest_dir`` directory. Return a fetched filename or None. - if not allow_unpinned: - force_pinned = True - else: - force_pinned = False + Use the first PyPI simple repository from a list of ``repos`` that contains + this sdist. + """ + if TRACE: + print(f" download_sdist: {name}=={version}") + + if not repos: + repos = DEFAULT_PYPI_REPOS - rrp = list(get_required_remote_packages( - requirements_file=requirements_file, - force_pinned=force_pinned, - remote_links_url=remote_links_url, - )) + fetched_sdist_filename = None + + for repo in repos: + package = repo.get_package_version(name=name, version=version) - for name, version, package in rrp: if not package: - missed.append((name, name,)) - nv = f'{name}=={version}' if version else name - yield None, f'fetch_sources: Missing package in remote repo: {nv}' + if TRACE_DEEP: + print(f" download_sdist: No package in {repo.index_url} for {name}=={version}") + continue + sdist = package.sdist + if not sdist: + if TRACE_DEEP: + print(f" download_sdist: No sdist for {name}=={version}") + continue - elif not package.sdist: - yield package, f'Missing sdist in links' + if TRACE_DEEP: + print(f" download_sdist: Getting sdist from index (or cache): {sdist.download_url}") + fetched_sdist_filename = package.sdist.download(dest_dir=dest_dir) - else: - fetched = package.fetch_sdist(dest_dir=dest_dir) - error = f'Failed to fetch' if not fetched else None - yield package, error - if missed: - raise Exception(f'Missing source packages in {remote_links_url}', missed) + if fetched_sdist_filename: + # do not futher fetch from other repos if we find in first, typically PyPI + break + + return fetched_sdist_filename ################################################################################ # @@ -286,12 +322,12 @@ def fetch_sources( class NameVer: name = attr.ib( type=str, - metadata=dict(help='Python package name, lowercase and normalized.'), + metadata=dict(help="Python package name, lowercase and normalized."), ) version = attr.ib( type=str, - metadata=dict(help='Python package version string.'), + metadata=dict(help="Python package version string."), ) @property @@ -306,17 +342,6 @@ def normalize_name(name): """ return name and re.sub(r"[-_.]+", "-", name).lower() or name - @staticmethod - def standardize_name(name): - """ - Return a standardized package name, e.g. lowercased and using - not _ - """ - return name and re.sub(r"[-_]+", "-", name).lower() or name - - @property - def name_ver(self): - return f'{self.name}-{self.version}' - def sortable_name_version(self): """ Return a tuple of values to sort by name, then version. @@ -326,154 +351,154 @@ def sortable_name_version(self): @classmethod def sorted(cls, namevers): - return sorted(namevers, key=cls.sortable_name_version) + return sorted(namevers or [], key=cls.sortable_name_version) @attr.attributes class Distribution(NameVer): - # field names that can be updated from another dist of mapping + # field names that can be updated from another Distribution or mapping updatable_fields = [ - 'license_expression', - 'copyright', - 'description', - 'homepage_url', - 'primary_language', - 'notice_text', - 'extra_data', + "license_expression", + "copyright", + "description", + "homepage_url", + "primary_language", + "notice_text", + "extra_data", ] filename = attr.ib( repr=False, type=str, - default='', - metadata=dict(help='File name.'), + default="", + metadata=dict(help="File name."), ) path_or_url = attr.ib( repr=False, type=str, - default='', - metadata=dict(help='Path or download URL.'), + default="", + metadata=dict(help="Path or URL"), ) sha256 = attr.ib( repr=False, type=str, - default='', - metadata=dict(help='SHA256 checksum.'), + default="", + metadata=dict(help="SHA256 checksum."), ) sha1 = attr.ib( repr=False, type=str, - default='', - metadata=dict(help='SHA1 checksum.'), + default="", + metadata=dict(help="SHA1 checksum."), ) md5 = attr.ib( repr=False, type=int, default=0, - metadata=dict(help='MD5 checksum.'), + metadata=dict(help="MD5 checksum."), ) type = attr.ib( repr=False, type=str, - default='pypi', - metadata=dict(help='Package type'), + default="pypi", + metadata=dict(help="Package type"), ) namespace = attr.ib( repr=False, type=str, - default='', - metadata=dict(help='Package URL namespace'), + default="", + metadata=dict(help="Package URL namespace"), ) qualifiers = attr.ib( repr=False, type=dict, default=attr.Factory(dict), - metadata=dict(help='Package URL qualifiers'), + metadata=dict(help="Package URL qualifiers"), ) subpath = attr.ib( repr=False, type=str, - default='', - metadata=dict(help='Package URL subpath'), + default="", + metadata=dict(help="Package URL subpath"), ) size = attr.ib( repr=False, type=str, - default='', - metadata=dict(help='Size in bytes.'), + default="", + metadata=dict(help="Size in bytes."), ) primary_language = attr.ib( repr=False, type=str, - default='Python', - metadata=dict(help='Primary Programming language.'), + default="Python", + metadata=dict(help="Primary Programming language."), ) description = attr.ib( repr=False, type=str, - default='', - metadata=dict(help='Description.'), + default="", + metadata=dict(help="Description."), ) homepage_url = attr.ib( repr=False, type=str, - default='', - metadata=dict(help='Homepage URL'), + default="", + metadata=dict(help="Homepage URL"), ) notes = attr.ib( repr=False, type=str, - default='', - metadata=dict(help='Notes.'), + default="", + metadata=dict(help="Notes."), ) copyright = attr.ib( repr=False, type=str, - default='', - metadata=dict(help='Copyright.'), + default="", + metadata=dict(help="Copyright."), ) license_expression = attr.ib( repr=False, type=str, - default='', - metadata=dict(help='License expression'), + default="", + metadata=dict(help="License expression"), ) licenses = attr.ib( repr=False, type=list, default=attr.Factory(list), - metadata=dict(help='List of license mappings.'), + metadata=dict(help="List of license mappings."), ) notice_text = attr.ib( repr=False, type=str, - default='', - metadata=dict(help='Notice text'), + default="", + metadata=dict(help="Notice text"), ) extra_data = attr.ib( repr=False, type=dict, default=attr.Factory(dict), - metadata=dict(help='Extra data'), + metadata=dict(help="Extra data"), ) @property @@ -481,51 +506,110 @@ def package_url(self): """ Return a Package URL string of self. """ - return str(packageurl.PackageURL(**self.purl_identifiers())) + return str( + packageurl.PackageURL( + type=self.type, + namespace=self.namespace, + name=self.name, + version=self.version, + subpath=self.subpath, + qualifiers=self.qualifiers, + ) + ) @property def download_url(self): - if self.path_or_url and self.path_or_url.startswith('https://'): - return self.path_or_url - else: - return self.get_best_download_url() + return self.get_best_download_url() + + def get_best_download_url(self, repos=tuple()): + """ + Return the best download URL for this distribution where best means this + is the first URL found for this distribution found in the list of + ``repos``. + + If none is found, return a synthetic PyPI remote URL. + """ + + if not repos: + repos = DEFAULT_PYPI_REPOS + + for repo in repos: + package = repo.get_package_version(name=self.name, version=self.version) + if not package: + if TRACE: + print( + f" get_best_download_url: {self.name}=={self.version} " + f"not found in {repo.index_url}" + ) + continue + pypi_url = package.get_url_for_filename(self.filename) + if pypi_url: + return pypi_url + else: + if TRACE: + print( + f" get_best_download_url: {self.filename} not found in {repo.index_url}" + ) + + def download(self, dest_dir=THIRDPARTY_DIR): + """ + Download this distribution into `dest_dir` directory. + Return the fetched filename. + """ + assert self.filename + if TRACE_DEEP: + print( + f"Fetching distribution of {self.name}=={self.version}:", + self.filename, + ) + + # FIXME: + fetch_and_save( + path_or_url=self.path_or_url, + dest_dir=dest_dir, + filename=self.filename, + as_text=False, + ) + return self.filename @property def about_filename(self): - return f'{self.filename}.ABOUT' - - def has_about_file(self, dest_dir=THIRDPARTY_DIR): - return os.path.exists(os.path.join(dest_dir, self.about_filename)) + return f"{self.filename}.ABOUT" @property def about_download_url(self): - return self.build_remote_download_url(self.about_filename) + return f"{ABOUT_BASE_URL}/{self.about_filename}" @property def notice_filename(self): - return f'{self.filename}.NOTICE' + return f"{self.filename}.NOTICE" @property def notice_download_url(self): - return self.build_remote_download_url(self.notice_filename) + return f"{ABOUT_BASE_URL}/{self.notice_filename}" @classmethod def from_path_or_url(cls, path_or_url): """ Return a distribution built from the data found in the filename of a - `path_or_url` string. Raise an exception if this is not a valid + ``path_or_url`` string. Raise an exception if this is not a valid filename. """ - filename = os.path.basename(path_or_url.strip('/')) + filename = os.path.basename(path_or_url.strip("/")) dist = cls.from_filename(filename) dist.path_or_url = path_or_url return dist @classmethod def get_dist_class(cls, filename): - if filename.endswith('.whl'): + if filename.endswith(".whl"): return Wheel - elif filename.endswith(('.zip', '.tar.gz',)): + elif filename.endswith( + ( + ".zip", + ".tar.gz", + ) + ): return Sdist raise InvalidDistributionFilename(filename) @@ -535,123 +619,15 @@ def from_filename(cls, filename): Return a distribution built from the data found in a `filename` string. Raise an exception if this is not a valid filename """ + filename = os.path.basename(filename.strip("/")) clazz = cls.get_dist_class(filename) return clazz.from_filename(filename) - @classmethod - def from_data(cls, data, keep_extra=False): - """ - Return a distribution built from a `data` mapping. - """ - filename = data['filename'] - dist = cls.from_filename(filename) - dist.update(data, keep_extra=keep_extra) - return dist - - @classmethod - def from_dist(cls, data, dist): - """ - Return a distribution built from a `data` mapping and update it with data - from another dist Distribution. Return None if it cannot be created - """ - # We can only create from a dist of the same package - has_same_key_fields = all(data.get(kf) == getattr(dist, kf, None) - for kf in ('type', 'namespace', 'name') - ) - if not has_same_key_fields: - print(f'Missing key fields: Cannot derive a new dist from data: {data} and dist: {dist}') - return - - has_key_field_values = all(data.get(kf) for kf in ('type', 'name', 'version')) - if not has_key_field_values: - print(f'Missing key field values: Cannot derive a new dist from data: {data} and dist: {dist}') - return - - data = dict(data) - # do not overwrite the data with the other dist - # only supplement - data.update({k: v for k, v in dist.get_updatable_data().items() if not data.get(k)}) - return cls.from_data(data) - - @classmethod - def build_remote_download_url(cls, filename, base_url=REMOTE_LINKS_URL): - """ - Return a direct download URL for a file in our remote repo - """ - return f'{base_url}/{filename}' - - def get_best_download_url(self): - """ - Return the best download URL for this distribution where best means that - PyPI is better and our own remote repo URLs are second. - If none is found, return a synthetic remote URL. - """ - name = self.normalized_name - version = self.version - filename = self.filename - - pypi_package = get_pypi_package(name=name, version=version) - if pypi_package: - pypi_url = pypi_package.get_url_for_filename(filename) - if pypi_url: - return pypi_url - - remote_package = get_remote_package(name=name, version=version) - if remote_package: - remote_url = remote_package.get_url_for_filename(filename) - if remote_url: - return remote_url - else: - # the package may not have been published yet, so we craft a URL - # using our remote base URL - return self.build_remote_download_url(self.filename) - - def purl_identifiers(self, skinny=False): - """ - Return a mapping of non-empty identifier name/values for the purl - fields. If skinny is True, only inlucde type, namespace and name. - """ - identifiers = dict( - type=self.type, - namespace=self.namespace, - name=self.name, - ) - - if not skinny: - identifiers.update( - version=self.version, - subpath=self.subpath, - qualifiers=self.qualifiers, - ) - - return {k: v for k, v in sorted(identifiers.items()) if v} - - def identifiers(self, purl_as_fields=True): - """ - Return a mapping of non-empty identifier name/values. - Return each purl fields separately if purl_as_fields is True. - Otherwise return a package_url string for the purl. - """ - if purl_as_fields: - identifiers = self.purl_identifiers() - else: - identifiers = dict(package_url=self.package_url) - - identifiers.update( - download_url=self.download_url, - filename=self.filename, - md5=self.md5, - sha1=self.sha1, - package_url=self.package_url, - ) - - return {k: v for k, v in sorted(identifiers.items()) if v} - def has_key_metadata(self): """ Return True if this distribution has key metadata required for basic attribution. """ - if self.license_expression == 'public-domain': + if self.license_expression == "public-domain": # copyright not needed return True return self.license_expression and self.copyright and self.path_or_url @@ -672,7 +648,7 @@ def to_about(self): name=self.name, namespace=self.namespace, notes=self.notes, - notice_file=self.notice_filename if self.notice_text else '', + notice_file=self.notice_filename if self.notice_text else "", package_url=self.package_url, primary_language=self.primary_language, qualifiers=self.qualifiers, @@ -690,7 +666,7 @@ def to_dict(self): """ Return a mapping data from this distribution. """ - return {k: v for k, v in attr.asdict(self).items() if v} + return {k: v for k, v in attr.asdict(self).items() if v} def save_about_and_notice_files(self, dest_dir=THIRDPARTY_DIR): """ @@ -705,14 +681,17 @@ def save_if_modified(location, content): if existing_content == content: return False - if TRACE: print(f'Saving ABOUT (and NOTICE) files for: {self}') - with open(location, 'w') as fo: + if TRACE: + print(f"Saving ABOUT (and NOTICE) files for: {self}") + with open(location, "w") as fo: fo.write(content) return True + as_about = self.to_about() + save_if_modified( location=os.path.join(dest_dir, self.about_filename), - content=saneyaml.dump(self.to_about()), + content=saneyaml.dump(as_about), ) notice_text = self.notice_text and self.notice_text.strip() @@ -745,26 +724,26 @@ def load_about_data(self, about_filename_or_data=None, dest_dir=THIRDPARTY_DIR): else: about_data = about_filename_or_data - md5 = about_data.pop('checksum_md5', None) + md5 = about_data.pop("checksum_md5", None) if md5: - about_data['md5'] = md5 - sha1 = about_data.pop('checksum_sha1', None) + about_data["md5"] = md5 + sha1 = about_data.pop("checksum_sha1", None) if sha1: - about_data['sha1'] = sha1 - sha256 = about_data.pop('checksum_sha256', None) + about_data["sha1"] = sha1 + sha256 = about_data.pop("checksum_sha256", None) if sha256: - about_data['sha256'] = sha256 + about_data["sha256"] = sha256 - about_data.pop('about_resource', None) - notice_text = about_data.pop('notice_text', None) - notice_file = about_data.pop('notice_file', None) + about_data.pop("about_resource", None) + notice_text = about_data.pop("notice_text", None) + notice_file = about_data.pop("notice_file", None) if notice_text: - about_data['notice_text'] = notice_text + about_data["notice_text"] = notice_text elif notice_file: notice_loc = os.path.join(dest_dir, notice_file) if os.path.exists(notice_loc): with open(notice_loc) as fi: - about_data['notice_text'] = fi.read() + about_data["notice_text"] = fi.read() return self.update(about_data, keep_extra=True) def load_remote_about_data(self): @@ -773,7 +752,10 @@ def load_remote_about_data(self): NOTICE file if any. Return True if the data was updated. """ try: - about_text = fetch_content_from_path_or_url_through_cache(self.about_download_url) + about_text = CACHE.get( + path_or_url=self.about_download_url, + as_text=True, + ) except RemoteNotFetchedException: return False @@ -781,14 +763,17 @@ def load_remote_about_data(self): return False about_data = saneyaml.load(about_text) - notice_file = about_data.pop('notice_file', None) + notice_file = about_data.pop("notice_file", None) if notice_file: try: - notice_text = fetch_content_from_path_or_url_through_cache(self.notice_download_url) + notice_text = CACHE.get( + path_or_url=self.notice_download_url, + as_text=True, + ) if notice_text: - about_data['notice_text'] = notice_text + about_data["notice_text"] = notice_text except RemoteNotFetchedException: - print(f'Failed to fetch NOTICE file: {self.notice_download_url}') + print(f"Failed to fetch NOTICE file: {self.notice_download_url}") return self.load_about_data(about_data) def get_checksums(self, dest_dir=THIRDPARTY_DIR): @@ -798,7 +783,7 @@ def get_checksums(self, dest_dir=THIRDPARTY_DIR): """ dist_loc = os.path.join(dest_dir, self.filename) if os.path.exists(dist_loc): - return multi_checksums(dist_loc, checksum_names=('md5', 'sha1', 'sha256')) + return multi_checksums(dist_loc, checksum_names=("md5", "sha1", "sha256")) else: return {} @@ -814,67 +799,65 @@ def validate_checksums(self, dest_dir=THIRDPARTY_DIR): checksums computed for this dist filename is `dest_dir`. """ real_checksums = self.get_checksums(dest_dir) - for csk in ('md5', 'sha1', 'sha256'): + for csk in ("md5", "sha1", "sha256"): csv = getattr(self, csk) rcv = real_checksums.get(csk) if csv and rcv and csv != rcv: return False return True - def get_pip_hash(self): - """ - Return a pip hash option string as used in requirements for this dist. - """ - assert self.sha256, f'Missinh SHA256 for dist {self}' - return f'--hash=sha256:{self.sha256}' - def get_license_keys(self): try: - keys = LICENSING.license_keys(self.license_expression, unique=True, simple=True) + keys = LICENSING.license_keys( + self.license_expression, + unique=True, + simple=True, + ) except license_expression.ExpressionParseError: - return ['unknown'] + return ["unknown"] return keys - def fetch_license_files(self, dest_dir=THIRDPARTY_DIR): + def fetch_license_files(self, dest_dir=THIRDPARTY_DIR, use_cached_index=False): """ - Fetch license files is missing in `dest_dir`. + Fetch license files if missing in `dest_dir`. Return True if license files were fetched. """ - paths_or_urls = get_remote_repo().links + urls = LinksRepository.from_url(use_cached_index=use_cached_index).links errors = [] - extra_lic_names = [l.get('file') for l in self.extra_data.get('licenses', {})] - extra_lic_names += [self.extra_data.get('license_file')] - extra_lic_names = [ln for ln in extra_lic_names if ln] - lic_names = [ f'{key}.LICENSE' for key in self.get_license_keys()] - for filename in lic_names + extra_lic_names: + extra_lic_names = [l.get("file") for l in self.extra_data.get("licenses", {})] + extra_lic_names += [self.extra_data.get("license_file")] + extra_lic_names = [ln for ln in extra_lic_names if ln] + lic_names = [f"{key}.LICENSE" for key in self.get_license_keys()] + for filename in lic_names + extra_lic_names: floc = os.path.join(dest_dir, filename) if os.path.exists(floc): continue try: # try remotely first - lic_url = get_link_for_filename( - filename=filename, paths_or_urls=paths_or_urls) + lic_url = get_license_link_for_filename(filename=filename, urls=urls) - fetch_and_save_path_or_url( - filename=filename, - dest_dir=dest_dir, + fetch_and_save( path_or_url=lic_url, + dest_dir=dest_dir, + filename=filename, as_text=True, ) - if TRACE: print(f'Fetched license from remote: {lic_url}') + if TRACE: + print(f"Fetched license from remote: {lic_url}") except: try: # try licensedb second - lic_url = f'{LICENSEDB_API_URL}/{filename}' - fetch_and_save_path_or_url( - filename=filename, - dest_dir=dest_dir, + lic_url = f"{LICENSEDB_API_URL}/{filename}" + fetch_and_save( path_or_url=lic_url, + dest_dir=dest_dir, + filename=filename, as_text=True, ) - if TRACE: print(f'Fetched license from licensedb: {lic_url}') + if TRACE: + print(f"Fetched license from licensedb: {lic_url}") except: msg = f'No text for license {filename} in expression "{self.license_expression}" from {self}' @@ -888,14 +871,27 @@ def extract_pkginfo(self, dest_dir=THIRDPARTY_DIR): Return the text of the first PKG-INFO or METADATA file found in the archive of this Distribution in `dest_dir`. Return None if not found. """ - fmt = 'zip' if self.filename.endswith('.whl') else None - dist = os.path.join(dest_dir, self.filename) - with tempfile.TemporaryDirectory(prefix='pypi-tmp-extract') as td: + + fn = self.filename + if fn.endswith(".whl"): + fmt = "zip" + elif fn.endswith(".tar.gz"): + fmt = "gztar" + else: + fmt = None + + dist = os.path.join(dest_dir, fn) + with tempfile.TemporaryDirectory(prefix=f"pypi-tmp-extract-{fn}") as td: shutil.unpack_archive(filename=dist, extract_dir=td, format=fmt) # NOTE: we only care about the first one found in the dist # which may not be 100% right for pi in fileutils.resource_iter(location=td, with_dirs=False): - if pi.endswith(('PKG-INFO', 'METADATA',)): + if pi.endswith( + ( + "PKG-INFO", + "METADATA", + ) + ): with open(pi) as fi: return fi.read() @@ -906,31 +902,33 @@ def load_pkginfo_data(self, dest_dir=THIRDPARTY_DIR): """ pkginfo_text = self.extract_pkginfo(dest_dir=dest_dir) if not pkginfo_text: - print(f'!!!!PKG-INFO not found in {self.filename}') + print(f"!!!!PKG-INFO/METADATA not found in {self.filename}") return raw_data = email.message_from_string(pkginfo_text) - classifiers = raw_data.get_all('Classifier') or [] + classifiers = raw_data.get_all("Classifier") or [] - declared_license = [raw_data['License']] + [c for c in classifiers if c.startswith('License')] + declared_license = [raw_data["License"]] + [ + c for c in classifiers if c.startswith("License") + ] license_expression = compute_normalized_license_expression(declared_license) - other_classifiers = [c for c in classifiers if not c.startswith('License')] + other_classifiers = [c for c in classifiers if not c.startswith("License")] - holder = raw_data['Author'] - holder_contact = raw_data['Author-email'] - copyright_statement = f'Copyright (c) {holder} <{holder_contact}>' + holder = raw_data["Author"] + holder_contact = raw_data["Author-email"] + copyright_statement = f"Copyright (c) {holder} <{holder_contact}>" pkginfo_data = dict( - name=raw_data['Name'], + name=raw_data["Name"], declared_license=declared_license, - version=raw_data['Version'], - description=raw_data['Summary'], - homepage_url=raw_data['Home-page'], + version=raw_data["Version"], + description=raw_data["Summary"], + homepage_url=raw_data["Home-page"], copyright=copyright_statement, license_expression=license_expression, holder=holder, holder_contact=holder_contact, - keywords=raw_data['Keywords'], + keywords=raw_data["Keywords"], classifiers=other_classifiers, ) @@ -944,10 +942,7 @@ def update_from_other_dist(self, dist): def get_updatable_data(self, data=None): data = data or self.to_dict() - return { - k: v for k, v in data.items() - if v and k in self.updatable_fields - } + return {k: v for k, v in data.items() if v and k in self.updatable_fields} def update(self, data, overwrite=False, keep_extra=True): """ @@ -956,20 +951,21 @@ def update(self, data, overwrite=False, keep_extra=True): Return True if any data was updated, False otherwise. Raise an exception if there are key data conflicts. """ - package_url = data.get('package_url') + package_url = data.get("package_url") if package_url: purl_from_data = packageurl.PackageURL.from_string(package_url) purl_from_self = packageurl.PackageURL.from_string(self.package_url) if purl_from_data != purl_from_self: print( - f'Invalid dist update attempt, no same same purl with dist: ' - f'{self} using data {data}.') + f"Invalid dist update attempt, no same same purl with dist: " + f"{self} using data {data}." + ) return - data.pop('about_resource', None) - dl = data.pop('download_url', None) + data.pop("about_resource", None) + dl = data.pop("download_url", None) if dl: - data['path_or_url'] = dl + data["path_or_url"] = dl updated = False extra = {} @@ -985,7 +981,7 @@ def update(self, data, overwrite=False, keep_extra=True): try: setattr(self, k, v) except Exception as e: - raise Exception(f'{self}, {k}, {v}') from e + raise Exception(f"{self}, {k}, {v}") from e updated = True elif keep_extra: @@ -998,18 +994,110 @@ def update(self, data, overwrite=False, keep_extra=True): return updated +def get_license_link_for_filename(filename, urls): + """ + Return a link for `filename` found in the `links` list of URLs or paths. Raise an + exception if no link is found or if there are more than one link for that + file name. + """ + path_or_url = [l for l in urls if l.endswith(f"/{filename}")] + if not path_or_url: + raise Exception(f"Missing link to file: {filename}") + if not len(path_or_url) == 1: + raise Exception(f"Multiple links to file: {filename}: \n" + "\n".join(path_or_url)) + return path_or_url[0] + + class InvalidDistributionFilename(Exception): pass +def get_sdist_name_ver_ext(filename): + """ + Return a (name, version, extension) if filename is a valid sdist name. Some legacy + binary builds have weird names. Return False otherwise. + + In particular they do not use PEP440 compliant versions and/or mix tags, os + and arch names in tarball names and versions: + + >>> assert get_sdist_name_ver_ext("intbitset-1.3.tar.gz") + >>> assert not get_sdist_name_ver_ext("intbitset-1.3.linux-x86_64.tar.gz") + >>> assert get_sdist_name_ver_ext("intbitset-1.4a.tar.gz") + >>> assert get_sdist_name_ver_ext("intbitset-1.4a.zip") + >>> assert not get_sdist_name_ver_ext("intbitset-2.0.linux-x86_64.tar.gz") + >>> assert get_sdist_name_ver_ext("intbitset-2.0.tar.gz") + >>> assert not get_sdist_name_ver_ext("intbitset-2.1-1.src.rpm") + >>> assert not get_sdist_name_ver_ext("intbitset-2.1-1.x86_64.rpm") + >>> assert not get_sdist_name_ver_ext("intbitset-2.1.linux-x86_64.tar.gz") + >>> assert not get_sdist_name_ver_ext("cffi-1.2.0-1.tar.gz") + >>> assert not get_sdist_name_ver_ext("html5lib-1.0-reupload.tar.gz") + >>> assert not get_sdist_name_ver_ext("selenium-2.0-dev-9429.tar.gz") + >>> assert not get_sdist_name_ver_ext("testfixtures-1.8.0dev-r4464.tar.gz") + """ + name_ver = None + extension = None + + for ext in EXTENSIONS_SDIST: + if filename.endswith(ext): + name_ver, extension, _ = filename.rpartition(ext) + break + + if not extension or not name_ver: + return False + + name, _, version = name_ver.rpartition("-") + + if not name or not version: + return False + + # weird version + if any( + w in version + for w in ( + "x86_64", + "i386", + ) + ): + return False + + # all char versions + if version.isalpha(): + return False + + # non-pep 440 version + if "-" in version: + return False + + # single version + if version.isdigit() and len(version) == 1: + return False + + # r1 version + if len(version) == 2 and version[0]=="r" and version[1].isdigit(): + return False + + # dotless version (but calver is OK) + if "." not in version and len(version) < 3: + return False + + # version with dashes selenium-2.0-dev-9429.tar.gz + if name.endswith(("dev",)) and "." not in version: + return False + # version pre or post, old legacy + if version.startswith(("beta", "rc", "pre", "post", "final")): + return False + + return name, version, extension + + @attr.attributes class Sdist(Distribution): extension = attr.ib( repr=False, type=str, - default='', - metadata=dict(help='File extension, including leading dot.'), + default="", + metadata=dict(help="File extension, including leading dot."), ) @classmethod @@ -1018,24 +1106,14 @@ def from_filename(cls, filename): Return a Sdist object built from a filename. Raise an exception if this is not a valid sdist filename """ - name_ver = None - extension = None - - for ext in EXTENSIONS_SDIST: - if filename.endswith(ext): - name_ver, extension, _ = filename.rpartition(ext) - break - - if not extension or not name_ver: + name_ver_ext = get_sdist_name_ver_ext(filename) + if not name_ver_ext: raise InvalidDistributionFilename(filename) - name, _, version = name_ver.rpartition('-') - - if not name or not version: - raise InvalidDistributionFilename(filename) + name, version, extension = name_ver_ext return cls( - type='pypi', + type="pypi", name=name, version=version, extension=extension, @@ -1047,7 +1125,7 @@ def to_filename(self): Return an sdist filename reconstructed from its fields (that may not be the same as the original filename.) """ - return f'{self.name}-{self.version}.{self.extension}' + return f"{self.name}-{self.version}.{self.extension}" @attr.attributes @@ -1092,38 +1170,38 @@ class Wheel(Distribution): r"""^(?P(?P.+?)-(?P.*?)) ((-(?P\d[^-]*?))?-(?P.+?)-(?P.+?)-(?P.+?) \.whl)$""", - re.VERBOSE + re.VERBOSE, ).match build = attr.ib( type=str, - default='', - metadata=dict(help='Python wheel build.'), + default="", + metadata=dict(help="Python wheel build."), ) python_versions = attr.ib( type=list, default=attr.Factory(list), - metadata=dict(help='List of wheel Python version tags.'), + metadata=dict(help="List of wheel Python version tags."), ) abis = attr.ib( type=list, default=attr.Factory(list), - metadata=dict(help='List of wheel ABI tags.'), + metadata=dict(help="List of wheel ABI tags."), ) platforms = attr.ib( type=list, default=attr.Factory(list), - metadata=dict(help='List of wheel platform tags.'), + metadata=dict(help="List of wheel platform tags."), ) tags = attr.ib( repr=False, type=set, default=attr.Factory(set), - metadata=dict(help='Set of all tags for this wheel.'), + metadata=dict(help="Set of all tags for this wheel."), ) @classmethod @@ -1136,24 +1214,23 @@ def from_filename(cls, filename): if not wheel_info: raise InvalidDistributionFilename(filename) - name = wheel_info.group('name').replace('_', '-') + name = wheel_info.group("name").replace("_", "-") # we'll assume "_" means "-" due to wheel naming scheme # (https://github.com/pypa/pip/issues/1150) - version = wheel_info.group('ver').replace('_', '-') - build = wheel_info.group('build') - python_versions = wheel_info.group('pyvers').split('.') - abis = wheel_info.group('abis').split('.') - platforms = wheel_info.group('plats').split('.') + version = wheel_info.group("ver").replace("_", "-") + build = wheel_info.group("build") + python_versions = wheel_info.group("pyvers").split(".") + abis = wheel_info.group("abis").split(".") + platforms = wheel_info.group("plats").split(".") # All the tag combinations from this file tags = { - packaging_tags.Tag(x, y, z) for x in python_versions - for y in abis for z in platforms + packaging_tags.Tag(x, y, z) for x in python_versions for y in abis for z in platforms } return cls( filename=filename, - type='pypi', + type="pypi", name=name, version=version, build=build, @@ -1167,25 +1244,22 @@ def is_supported_by_tags(self, tags): """ Return True is this wheel is compatible with one of a list of PEP 425 tags. """ + if TRACE_DEEP: + print() + print("is_supported_by_tags: tags:", tags) + print("self.tags:", self.tags) return not self.tags.isdisjoint(tags) - def is_supported_by_environment(self, environment): - """ - Return True if this wheel is compatible with the Environment - `environment`. - """ - return not self.is_supported_by_tags(environment.tags) - def to_filename(self): """ Return a wheel filename reconstructed from its fields (that may not be the same as the original filename.) """ - build = f'-{self.build}' if self.build else '' - pyvers = '.'.join(self.python_versions) - abis = '.'.join(self.abis) - plats = '.'.join(self.platforms) - return f'{self.name}-{self.version}{build}-{pyvers}-{abis}-{plats}.whl' + build = f"-{self.build}" if self.build else "" + pyvers = ".".join(self.python_versions) + abis = ".".join(self.abis) + plats = ".".join(self.platforms) + return f"{self.name}-{self.version}{build}-{pyvers}-{abis}-{plats}.whl" def is_pure(self): """ @@ -1211,11 +1285,7 @@ def is_pure(self): >>> Wheel.from_filename('future-0.16.0-py3-cp36m-any.whl').is_pure() False """ - return ( - 'py3' in self.python_versions - and 'none' in self.abis - and 'any' in self.platforms - ) + return "py3" in self.python_versions and "none" in self.abis and "any" in self.platforms def is_pure_wheel(filename): @@ -1228,49 +1298,32 @@ def is_pure_wheel(filename): @attr.attributes class PypiPackage(NameVer): """ - A Python package with its "distributions", e.g. wheels and source - distribution , ABOUT files and licenses or notices. + A Python package contains one or more wheels and one source distribution + from a repository. """ + sdist = attr.ib( repr=False, - type=str, - default='', - metadata=dict(help='Sdist source distribution for this package.'), + type=Sdist, + default=None, + metadata=dict(help="Sdist source distribution for this package."), ) wheels = attr.ib( repr=False, type=list, default=attr.Factory(list), - metadata=dict(help='List of Wheel for this package'), + metadata=dict(help="List of Wheel for this package"), ) - @property - def specifier(self): - """ - A requirement specifier for this package - """ - if self.version: - return f'{self.name}=={self.version}' - else: - return self.name - - @property - def specifier_with_hashes(self): - """ - Return a requirement specifier for this package with --hash options for - all its distributions - """ - items = [self.specifier] - items += [d.get_pip_hashes() for d in self.get_distributions()] - return ' \\\n '.join(items) - - def get_supported_wheels(self, environment): + def get_supported_wheels(self, environment, verbose=TRACE_ULTRA_DEEP): """ Yield all the Wheel of this package supported and compatible with the Environment `environment`. """ envt_tags = environment.tags() + if verbose: + print("get_supported_wheels: envt_tags:", envt_tags) for wheel in self.wheels: if wheel.is_supported_by_tags(envt_tags): yield wheel @@ -1296,6 +1349,8 @@ def package_from_dists(cls, dists): >>> assert package.wheels == [w1, w2] """ dists = list(dists) + if TRACE_DEEP: + print(f"package_from_dists: {dists}") if not dists: return @@ -1306,13 +1361,21 @@ def package_from_dists(cls, dists): package = PypiPackage(name=normalized_name, version=version) for dist in dists: - if dist.normalized_name != normalized_name or dist.version != version: + if dist.normalized_name != normalized_name: if TRACE: print( - f' Skipping inconsistent dist name and version: {dist} ' - f'Expected instead package name: {normalized_name} and version: "{version}"' + f" Skipping inconsistent dist name: expected {normalized_name} got {dist}" ) continue + elif dist.version != version: + dv = packaging_version.parse(dist.version) + v = packaging_version.parse(version) + if dv != v: + if TRACE: + print( + f" Skipping inconsistent dist version: expected {version} got {dist}" + ) + continue if isinstance(dist, Sdist): package.sdist = dist @@ -1321,210 +1384,102 @@ def package_from_dists(cls, dists): package.wheels.append(dist) else: - raise Exception(f'Unknown distribution type: {dist}') + raise Exception(f"Unknown distribution type: {dist}") + + if TRACE_DEEP: + print(f"package_from_dists: {package}") return package @classmethod - def packages_from_one_path_or_url(cls, path_or_url): + def packages_from_dir(cls, directory): """ - Yield PypiPackages built from files found in at directory path or the - URL to an HTML page (that will be fetched). + Yield PypiPackages built from files found in at directory path. """ - extracted_paths_or_urls = get_paths_or_urls(path_or_url) - return cls.packages_from_many_paths_or_urls(extracted_paths_or_urls) + base = os.path.abspath(directory) + + paths = [os.path.join(base, f) for f in os.listdir(base) if f.endswith(EXTENSIONS)] + + if TRACE_ULTRA_DEEP: + print("packages_from_dir: paths:", paths) + return PypiPackage.packages_from_many_paths_or_urls(paths) @classmethod def packages_from_many_paths_or_urls(cls, paths_or_urls): """ Yield PypiPackages built from a list of paths or URLs. + These are sorted by name and then by version from oldest to newest. """ - dists = cls.get_dists(paths_or_urls) + dists = PypiPackage.dists_from_paths_or_urls(paths_or_urls) + if TRACE_ULTRA_DEEP: + print("packages_from_many_paths_or_urls: dists:", dists) + dists = NameVer.sorted(dists) for _projver, dists_of_package in itertools.groupby( - dists, key=NameVer.sortable_name_version, + dists, + key=NameVer.sortable_name_version, ): - yield PypiPackage.package_from_dists(dists_of_package) + package = PypiPackage.package_from_dists(dists_of_package) + if TRACE_ULTRA_DEEP: + print("packages_from_many_paths_or_urls", package) + yield package @classmethod - def get_versions_from_path_or_url(cls, name, path_or_url): + def dists_from_paths_or_urls(cls, paths_or_urls): """ - Return a subset list from a list of PypiPackages version at `path_or_url` - that match PypiPackage `name`. - """ - packages = cls.packages_from_one_path_or_url(path_or_url) - return cls.get_versions(name, packages) + Return a list of Distribution given a list of + ``paths_or_urls`` to wheels or source distributions. - @classmethod - def get_versions(cls, name, packages): - """ - Return a subset list of package versions from a list of `packages` that - match PypiPackage `name`. - The list is sorted by version from oldest to most recent. - """ - norm_name = NameVer.normalize_name(name) - versions = [p for p in packages if p.normalized_name == norm_name] - return cls.sorted(versions) - - @classmethod - def get_latest_version(cls, name, packages): - """ - Return the latest version of PypiPackage `name` from a list of `packages`. - """ - versions = cls.get_versions(name, packages) - if not versions: - return - return versions[-1] - - @classmethod - def get_outdated_versions(cls, name, packages): - """ - Return all versions except the latest version of PypiPackage `name` from a - list of `packages`. - """ - versions = cls.get_versions(name, packages) - return versions[:-1] - - @classmethod - def get_name_version(cls, name, version, packages): - """ - Return the PypiPackage with `name` and `version` from a list of `packages` - or None if it is not found. - If `version` is None, return the latest version found. - """ - if version is None: - return cls.get_latest_version(name, packages) - - nvs = [p for p in cls.get_versions(name, packages) if p.version == version] - - if not nvs: - return - - if len(nvs) == 1: - return nvs[0] - - raise Exception(f'More than one PypiPackage with {name}=={version}') - - def fetch_wheel( - self, - environment=None, - fetched_filenames=None, - dest_dir=THIRDPARTY_DIR, - ): - """ - Download a binary wheel of this package matching the ``environment`` - Enviromnent constraints into ``dest_dir`` directory. - - Return the wheel filename if it was fetched, None otherwise. - - If the provided ``environment`` is None then the current Python - interpreter environment is used implicitly. Do not refetch wheel if - their name is in a provided ``fetched_filenames`` set. - """ - fetched_wheel_filename = None - if fetched_filenames is not None: - fetched_filenames = fetched_filenames - else: - fetched_filenames = set() - - for wheel in self.get_supported_wheels(environment): - - if wheel.filename not in fetched_filenames: - fetch_and_save_path_or_url( - filename=wheel.filename, - path_or_url=wheel.path_or_url, - dest_dir=dest_dir, - as_text=False, - ) - fetched_filenames.add(wheel.filename) - fetched_wheel_filename = wheel.filename - - # TODO: what if there is more than one? - break - - return fetched_wheel_filename - - def fetch_sdist(self, dest_dir=THIRDPARTY_DIR): - """ - Download the source distribution into `dest_dir` directory. Return the - fetched filename if it was fetched, False otherwise. - """ - if self.sdist: - assert self.sdist.filename - if TRACE: print('Fetching source for package:', self.name, self.version) - fetch_and_save_path_or_url( - filename=self.sdist.filename, - dest_dir=dest_dir, - path_or_url=self.sdist.path_or_url, - as_text=False, - ) - if TRACE: print(' --> file:', self.sdist.filename) - return self.sdist.filename - else: - print(f'Missing sdist for: {self.name}=={self.version}') - return False - - def delete_files(self, dest_dir=THIRDPARTY_DIR): - """ - Delete all PypiPackage files from `dest_dir` including wheels, sdist and - their ABOUT files. Note that we do not delete licenses since they can be - shared by several packages: therefore this would be done elsewhere in a - function that is aware of all used licenses. - """ - for to_delete in self.wheels + [self.sdist]: - if not to_delete: - continue - tdfn = to_delete.filename - for deletable in [tdfn, f'{tdfn}.ABOUT', f'{tdfn}.NOTICE']: - target = os.path.join(dest_dir, deletable) - if os.path.exists(target): - print(f'Deleting outdated {target}') - fileutils.delete(target) - - @classmethod - def get_dists(cls, paths_or_urls): - """ - Return a list of Distribution given a list of - `paths_or_urls` to wheels or source distributions. - - Each Distribution receives two extra attributes: - - the path_or_url it was created from - - its filename + Each Distribution receives two extra attributes: + - the path_or_url it was created from + - its filename For example: >>> paths_or_urls =''' ... /home/foo/bitarray-0.8.1-cp36-cp36m-linux_x86_64.whl ... bitarray-0.8.1-cp36-cp36m-macosx_10_9_x86_64.macosx_10_10_x86_64.whl ... bitarray-0.8.1-cp36-cp36m-win_amd64.whl - ... httsp://example.com/bar/bitarray-0.8.1.tar.gz - ... bitarray-0.8.1.tar.gz.ABOUT bit.LICENSE'''.split() - >>> result = list(PypiPackage.get_dists(paths_or_urls)) + ... https://example.com/bar/bitarray-0.8.1.tar.gz + ... bitarray-0.8.1.tar.gz.ABOUT + ... bit.LICENSE'''.split() + >>> results = list(PypiPackage.dists_from_paths_or_urls(paths_or_urls)) >>> for r in results: - ... r.filename = '' - ... r.path_or_url = '' - >>> expected = [ - ... Wheel(name='bitarray', version='0.8.1', build='', - ... python_versions=['cp36'], abis=['cp36m'], - ... platforms=['linux_x86_64']), - ... Wheel(name='bitarray', version='0.8.1', build='', - ... python_versions=['cp36'], abis=['cp36m'], - ... platforms=['macosx_10_9_x86_64', 'macosx_10_10_x86_64']), - ... Wheel(name='bitarray', version='0.8.1', build='', - ... python_versions=['cp36'], abis=['cp36m'], - ... platforms=['win_amd64']), - ... Sdist(name='bitarray', version='0.8.1') - ... ] - >>> assert expected == result - """ + ... print(r.__class__.__name__, r.name, r.version) + ... if isinstance(r, Wheel): + ... print(" ", ", ".join(r.python_versions), ", ".join(r.platforms)) + Wheel bitarray 0.8.1 + cp36 linux_x86_64 + Wheel bitarray 0.8.1 + cp36 macosx_10_9_x86_64, macosx_10_10_x86_64 + Wheel bitarray 0.8.1 + cp36 win_amd64 + Sdist bitarray 0.8.1 + """ + dists = [] + if TRACE_ULTRA_DEEP: + print(" ###paths_or_urls:", paths_or_urls) installable = [f for f in paths_or_urls if f.endswith(EXTENSIONS_INSTALLABLE)] for path_or_url in installable: try: - yield Distribution.from_path_or_url(path_or_url) + dist = Distribution.from_path_or_url(path_or_url) + dists.append(dist) + if TRACE_DEEP: + print( + " ===> dists_from_paths_or_urls:", + dist, + "\n ", + "with URL:", + dist.download_url, + "\n ", + "from URL:", + path_or_url, + ) except InvalidDistributionFilename: - if TRACE: - print(f'Skipping invalid distribution from: {path_or_url}') + if TRACE_DEEP: + print(f" Skipping invalid distribution from: {path_or_url}") continue + return dists def get_distributions(self): """ @@ -1549,50 +1504,54 @@ class Environment: """ An Environment describes a target installation environment with its supported Python version, ABI, platform, implementation and related - attributes. We can use these to pass as `pip download` options and force - fetching only the subset of packages that match these Environment - constraints as opposed to the current running Python interpreter - constraints. + attributes. + + We can use these to pass as `pip download` options and force fetching only + the subset of packages that match these Environment constraints as opposed + to the current running Python interpreter constraints. """ python_version = attr.ib( type=str, - default='', - metadata=dict(help='Python version supported by this environment.'), + default="", + metadata=dict(help="Python version supported by this environment."), ) operating_system = attr.ib( type=str, - default='', - metadata=dict(help='operating system supported by this environment.'), + default="", + metadata=dict(help="operating system supported by this environment."), ) implementation = attr.ib( type=str, - default='cp', - metadata=dict(help='Python implementation supported by this environment.'), + default="cp", + metadata=dict(help="Python implementation supported by this environment."), + repr=False, ) abis = attr.ib( type=list, default=attr.Factory(list), - metadata=dict(help='List of ABI tags supported by this environment.'), + metadata=dict(help="List of ABI tags supported by this environment."), + repr=False, ) platforms = attr.ib( type=list, default=attr.Factory(list), - metadata=dict(help='List of platform tags supported by this environment.'), + metadata=dict(help="List of platform tags supported by this environment."), + repr=False, ) @classmethod def from_pyver_and_os(cls, python_version, operating_system): - if '.' in python_version: - python_version = ''.join(python_version.split('.')) + if "." in python_version: + python_version = "".join(python_version.split(".")) return cls( python_version=python_version, - implementation='cp', + implementation="cp", abis=ABIS_BY_PYTHON_VERSION[python_version], platforms=PLATFORMS_BY_OS[operating_system], operating_system=operating_system, @@ -1600,27 +1559,34 @@ def from_pyver_and_os(cls, python_version, operating_system): def get_pip_cli_options(self): """ - Return a list of pip command line options for this environment. + Return a list of pip download command line options for this environment. """ options = [ - '--python-version', self.python_version, - '--implementation', self.implementation, - '--abi', self.abi, + "--python-version", + self.python_version, + "--implementation", + self.implementation, ] + for abi in self.abis: + options.extend(["--abi", abi]) + for platform in self.platforms: - options.extend(['--platform', platform]) + options.extend(["--platform", platform]) + return options def tags(self): """ Return a set of all the PEP425 tags supported by this environment. """ - return set(utils_pip_compatibility_tags.get_supported( - version=self.python_version or None, - impl=self.implementation or None, - platforms=self.platforms or None, - abis=self.abis or None, - )) + return set( + utils_pip_compatibility_tags.get_supported( + version=self.python_version or None, + impl=self.implementation or None, + platforms=self.platforms or None, + abis=self.abis or None, + ) + ) ################################################################################ # @@ -1630,142 +1596,199 @@ def tags(self): @attr.attributes -class Repository: +class PypiSimpleRepository: """ - A PyPI or links Repository of Python packages: wheels, sdist, ABOUT, etc. + A PyPI repository of Python packages: wheels, sdist, etc. like the public + PyPI simple index. It is populated lazily based on requested packages names. """ - packages_by_normalized_name = attr.ib( - type=dict, - default=attr.Factory(lambda: defaultdict(list)), - metadata=dict(help= - 'Mapping of {package name: [package objects]} available in this repo'), + index_url = attr.ib( + type=str, + default=PYPI_SIMPLE_URL, + metadata=dict(help="Base PyPI simple URL for this index."), ) - packages_by_normalized_name_version = attr.ib( + # we keep a nested mapping of PypiPackage that has this shape: + # {name: {version: PypiPackage, version: PypiPackage, etc} + # the inner versions mapping is sorted by version from oldest to newest + + packages = attr.ib( type=dict, - default=attr.Factory(dict), - metadata=dict(help= - 'Mapping of {(name, version): package object} available in this repo'), + default=attr.Factory(lambda: defaultdict(dict)), + metadata=dict( + help="Mapping of {name: {version: PypiPackage, version: PypiPackage, etc} available in this repo" + ), ) - def get_links(self, *args, **kwargs): - raise NotImplementedError() + fetched_package_normalized_names = attr.ib( + type=set, + default=attr.Factory(set), + metadata=dict(help="A set of already fetched package normalized names."), + ) - def get_versions(self, name): + use_cached_index = attr.ib( + type=bool, + default=False, + metadata=dict(help="If True, use any existing on-disk cached PyPI index files. Otherwise, fetch and cache."), + ) + + def _get_package_versions_map(self, name): """ - Return a list of all available PypiPackage version for this package name. - The list may be empty. + Return a mapping of all available PypiPackage version for this package name. + The mapping may be empty. It is ordered by version from oldest to newest """ - raise NotImplementedError() + assert name + normalized_name = NameVer.normalize_name(name) + versions = self.packages[normalized_name] + if not versions and normalized_name not in self.fetched_package_normalized_names: + self.fetched_package_normalized_names.add(normalized_name) + try: + links = self.fetch_links(normalized_name=normalized_name) + # note that thsi is sorted so the mapping is also sorted + versions = { + package.version: package + for package in PypiPackage.packages_from_many_paths_or_urls(paths_or_urls=links) + } + self.packages[normalized_name] = versions + except RemoteNotFetchedException as e: + if TRACE: + print(f"failed to fetch package name: {name} from: {self.index_url}:\n{e}") + + if not versions and TRACE: + print(f"WARNING: package {name} not found in repo: {self.index_url}") + + return versions - def get_package(self, name, version): + def get_package_versions(self, name): + """ + Return a mapping of all available PypiPackage version as{version: + package} for this package name. The mapping may be empty but not None. + It is sorted by version from oldest to newest. + """ + return dict(self._get_package_versions_map(name)) + + def get_package_version(self, name, version=None): """ Return the PypiPackage with name and version or None. + Return the latest PypiPackage version if version is None. """ - raise NotImplementedError() + if not version: + versions = list(self._get_package_versions_map(name).values()) + return versions and versions[-1] + else: + return self._get_package_versions_map(name).get(version) - def get_latest_version(self, name): + def fetch_links(self, normalized_name): """ - Return the latest PypiPackage version for this package name or None. + Return a list of download link URLs found in a PyPI simple index for package + name using the `index_url` of this repository. """ - raise NotImplementedError() + package_url = f"{self.index_url}/{normalized_name}" + text = CACHE.get( + path_or_url=package_url, + as_text=True, + force=not self.use_cached_index, + ) + links = collect_urls(text) + # TODO: keep sha256 + links = [l.partition("#sha256=") for l in links] + links = [url for url, _, _sha256 in links] + return links + + +PYPI_PUBLIC_REPO = PypiSimpleRepository(index_url=PYPI_SIMPLE_URL) +PYPI_SELFHOSTED_REPO = PypiSimpleRepository(index_url=ABOUT_PYPI_SIMPLE_URL) +DEFAULT_PYPI_REPOS = PYPI_PUBLIC_REPO, PYPI_SELFHOSTED_REPO +DEFAULT_PYPI_REPOS_BY_URL = {r.index_url: r for r in DEFAULT_PYPI_REPOS} @attr.attributes -class LinksRepository(Repository): +class LinksRepository: """ - Represents a simple links repository which is either a local directory with - Python wheels and sdist or a remote URL to an HTML with links to these. - (e.g. suitable for use with pip --find-links). + Represents a simple links repository such an HTTP directory listing or an + HTML page with links. """ - path_or_url = attr.ib( + + url = attr.ib( type=str, - default='', - metadata=dict(help='Package directory path or URL'), + default="", + metadata=dict(help="Links directory URL"), ) links = attr.ib( type=list, default=attr.Factory(list), - metadata=dict(help='List of links available in this repo'), + metadata=dict(help="List of links available in this repo"), + ) + + use_cached_index = attr.ib( + type=bool, + default=False, + metadata=dict(help="If True, use any existing on-disk cached index files. Otherwise, fetch and cache."), ) def __attrs_post_init__(self): if not self.links: - self.links = get_paths_or_urls(links_url=self.path_or_url) - if not self.packages_by_normalized_name: - for p in PypiPackage.packages_from_many_paths_or_urls(paths_or_urls=self.links): - normalized_name = p.normalized_name - self.packages_by_normalized_name[normalized_name].append(p) - self.packages_by_normalized_name_version[(normalized_name, p.version)] = p - - def get_links(self, *args, **kwargs): - return self.links or [] + self.links = self.find_links() - def get_versions(self, name): - name = name and NameVer.normalize_name(name) - return self.packages_by_normalized_name.get(name, []) - - def get_latest_version(self, name): - return PypiPackage.get_latest_version(name, self.get_versions(name)) - - def get_package(self, name, version): - return PypiPackage.get_name_version(name, version, self.get_versions(name)) + def find_links(self, _CACHE=[]): + """ + Return a list of link URLs found in the HTML page at `self.url` + """ + if _CACHE: + return _CACHE + links_url = self.url + if TRACE_DEEP: + print(f"Finding links from: {links_url}") + plinks_url = urllib.parse.urlparse(links_url) + base_url = urllib.parse.SplitResult( + plinks_url.scheme, plinks_url.netloc, "", "", "" + ).geturl() -@attr.attributes -class PypiRepository(Repository): - """ - Represents the public PyPI simple index. - It is populated lazily based on requested packages names - """ - simple_url = attr.ib( - type=str, - default=PYPI_SIMPLE_URL, - metadata=dict(help='Base PyPI simple URL for this index.'), - ) + if TRACE_DEEP: + print(f"Base URL {base_url}") - links_by_normalized_name = attr.ib( - type=dict, - default=attr.Factory(lambda: defaultdict(list)), - metadata=dict(help='Mapping of {package name: [links]} available in this repo'), - ) + text = CACHE.get( + path_or_url=links_url, + as_text=True, + force=not self.use_cached_index, + ) - def _fetch_links(self, name): - name = name and NameVer.normalize_name(name) - return find_pypi_links(name=name, simple_url=self.simple_url) + links = [] + for link in collect_urls(text): + if not link.endswith(EXTENSIONS): + continue - def _populate_links_and_packages(self, name): - name = name and NameVer.normalize_name(name) - if name in self.links_by_normalized_name: - return + plink = urllib.parse.urlsplit(link) - links = self._fetch_links(name) - self.links_by_normalized_name[name] = links + if plink.scheme: + # full URL kept as-is + url = link - packages = list(PypiPackage.packages_from_many_paths_or_urls(paths_or_urls=links)) - self.packages_by_normalized_name[name] = packages + if plink.path.startswith("/"): + # absolute link + url = f"{base_url}{link}" - for p in packages: - name = name and NameVer.normalize_name(p.name) - self.packages_by_normalized_name_version[(name, p.version)] = p + else: + # relative link + url = f"{links_url}/{link}" - def get_links(self, name, *args, **kwargs): - name = name and NameVer.normalize_name(name) - self._populate_links_and_packages(name) - return self.links_by_normalized_name.get(name, []) + if TRACE_DEEP: + print(f"Adding URL: {url}") - def get_versions(self, name): - name = name and NameVer.normalize_name(name) - self._populate_links_and_packages(name) - return self.packages_by_normalized_name.get(name, []) + links.append(url) - def get_latest_version(self, name): - return PypiPackage.get_latest_version(name, self.get_versions(name)) + if TRACE: + print(f"Found {len(links)} links at {links_url}") + _CACHE.extend(links) + return links - def get_package(self, name, version): - return PypiPackage.get_name_version(name, version, self.get_versions(name)) + @classmethod + def from_url(cls, url=ABOUT_BASE_URL, _LINKS_REPO={}, use_cached_index=False): + if url not in _LINKS_REPO: + _LINKS_REPO[url] = cls(url=url, use_cached_index=use_cached_index) + return _LINKS_REPO[url] ################################################################################ # Globals for remote repos to be lazily created and cached on first use for the @@ -1778,51 +1801,7 @@ def get_local_packages(directory=THIRDPARTY_DIR): Return the list of all PypiPackage objects built from a local directory. Return an empty list if the package cannot be found. """ - return list(PypiPackage.packages_from_one_path_or_url(path_or_url=directory)) - - -def get_local_repo(directory=THIRDPARTY_DIR): - return LinksRepository(path_or_url=directory) - - -_REMOTE_REPO = None - - -def get_remote_repo(remote_links_url=REMOTE_LINKS_URL): - global _REMOTE_REPO - if not _REMOTE_REPO: - _REMOTE_REPO = LinksRepository(path_or_url=remote_links_url) - return _REMOTE_REPO - - -def get_remote_package(name, version, remote_links_url=REMOTE_LINKS_URL): - """ - Return a PypiPackage or None. - """ - try: - return get_remote_repo(remote_links_url).get_package(name, version) - except RemoteNotFetchedException as e: - print(f'Failed to fetch remote package info: {e}') - - -_PYPI_REPO = None - - -def get_pypi_repo(pypi_simple_url=PYPI_SIMPLE_URL): - global _PYPI_REPO - if not _PYPI_REPO: - _PYPI_REPO = PypiRepository(simple_url=pypi_simple_url) - return _PYPI_REPO - - -def get_pypi_package(name, version, pypi_simple_url=PYPI_SIMPLE_URL): - """ - Return a PypiPackage or None. - """ - try: - return get_pypi_repo(pypi_simple_url).get_package(name, version) - except RemoteNotFetchedException as e: - print(f'Failed to fetch remote package info: {e}') + return list(PypiPackage.packages_from_dir(directory=directory)) ################################################################################ # @@ -1843,34 +1822,31 @@ class Cache: def __attrs_post_init__(self): os.makedirs(self.directory, exist_ok=True) - def clear(self): - shutil.rmtree(self.directory) - - def get(self, path_or_url, as_text=True): + def get(self, path_or_url, as_text=True, force=False): """ - Get a file from a `path_or_url` through the cache. - `path_or_url` can be a path or a URL to a file. + Return the content fetched from a ``path_or_url`` through the cache. + Raise an Exception on errors. Treats the content as text if as_text is + True otherwise as treat as binary. `path_or_url` can be a path or a URL + to a file. """ - filename = os.path.basename(path_or_url.strip('/')) - cached = os.path.join(self.directory, filename) + cache_key = quote_plus(path_or_url.strip("/")) + cached = os.path.join(self.directory, cache_key) - if not os.path.exists(cached): + if force or not os.path.exists(cached): + if TRACE_DEEP: + print(f" FILE CACHE MISS: {path_or_url}") content = get_file_content(path_or_url=path_or_url, as_text=as_text) - wmode = 'w' if as_text else 'wb' + wmode = "w" if as_text else "wb" with open(cached, wmode) as fo: fo.write(content) return content else: + if TRACE_DEEP: + print(f" FILE CACHE HIT: {path_or_url}") return get_local_file_content(path=cached, as_text=as_text) - def put(self, filename, content): - """ - Put in the cache the `content` of `filename`. - """ - cached = os.path.join(self.directory, filename) - wmode = 'wb' if isinstance(content, bytes) else 'w' - with open(cached, wmode) as fo: - fo.write(content) + +CACHE = Cache() def get_file_content(path_or_url, as_text=True): @@ -1878,18 +1854,19 @@ def get_file_content(path_or_url, as_text=True): Fetch and return the content at `path_or_url` from either a local path or a remote URL. Return the content as bytes is `as_text` is False. """ - if (path_or_url.startswith('file://') - or (path_or_url.startswith('/') and os.path.exists(path_or_url)) - ): - return get_local_file_content(path=path_or_url, as_text=as_text) - - elif path_or_url.startswith('https://'): - if TRACE: print(f'Fetching: {path_or_url}') + if path_or_url.startswith("https://"): + if TRACE_DEEP: + print(f"Fetching: {path_or_url}") _headers, content = get_remote_file_content(url=path_or_url, as_text=as_text) return content + elif path_or_url.startswith("file://") or ( + path_or_url.startswith("/") and os.path.exists(path_or_url) + ): + return get_local_file_content(path=path_or_url, as_text=as_text) + else: - raise Exception(f'Unsupported URL scheme: {path_or_url}') + raise Exception(f"Unsupported URL scheme: {path_or_url}") def get_local_file_content(path, as_text=True): @@ -1897,10 +1874,10 @@ def get_local_file_content(path, as_text=True): Return the content at `url` as text. Return the content as bytes is `as_text` is False. """ - if path.startswith('file://'): + if path.startswith("file://"): path = path[7:] - mode = 'r' if as_text else 'rb' + mode = "r" if as_text else "rb" with open(path, mode) as fo: return fo.read() @@ -1909,7 +1886,13 @@ class RemoteNotFetchedException(Exception): pass -def get_remote_file_content(url, as_text=True, headers_only=False, headers=None, _delay=0,): +def get_remote_file_content( + url, + as_text=True, + headers_only=False, + headers=None, + _delay=0, +): """ Fetch and return a tuple of (headers, content) at `url`. Return content as a text string if `as_text` is True. Otherwise return the content as bytes. @@ -1924,6 +1907,7 @@ def get_remote_file_content(url, as_text=True, headers_only=False, headers=None, # using a GET with stream=True ensure we get the the final header from # several redirects and that we can ignore content there. A HEAD request may # not get us this last header + print(f" DOWNLOADING: {url}") with requests.get(url, allow_redirects=True, stream=True, headers=headers) as response: status = response.status_code if status != requests.codes.ok: # NOQA @@ -1939,7 +1923,7 @@ def get_remote_file_content(url, as_text=True, headers_only=False, headers=None, ) else: - raise RemoteNotFetchedException(f'Failed HTTP request from {url} with {status}') + raise RemoteNotFetchedException(f"Failed HTTP request from {url} with {status}") if headers_only: return response.headers, None @@ -1947,465 +1931,53 @@ def get_remote_file_content(url, as_text=True, headers_only=False, headers=None, return response.headers, response.text if as_text else response.content -def get_url_content_if_modified(url, md5, _delay=0,): - """ - Return fetched content bytes at `url` or None if the md5 has not changed. - Retries multiple times to fetch if there is a HTTP 429 throttling response - and this with an increasing delay. - """ - time.sleep(_delay) - headers = None - if md5: - etag = f'"{md5}"' - headers = {'If-None-Match': f'{etag}'} - - # using a GET with stream=True ensure we get the the final header from - # several redirects and that we can ignore content there. A HEAD request may - # not get us this last header - with requests.get(url, allow_redirects=True, stream=True, headers=headers) as response: - status = response.status_code - if status == requests.codes.too_many_requests and _delay < 20: # NOQA - # too many requests: start waiting with some exponential delay - _delay = (_delay * 2) or 1 - return get_url_content_if_modified(url=url, md5=md5, _delay=_delay) - - elif status == requests.codes.not_modified: # NOQA - # all is well, the md5 is the same - return None - - elif status != requests.codes.ok: # NOQA - raise RemoteNotFetchedException(f'Failed HTTP request from {url} with {status}') - - return response.content - - -def get_remote_headers(url): - """ - Fetch and return a mapping of HTTP headers of `url`. - """ - headers, _content = get_remote_file_content(url, headers_only=True) - return headers - - -def fetch_and_save_filename_from_paths_or_urls( +def fetch_and_save( + path_or_url, + dest_dir, filename, - paths_or_urls, - dest_dir=THIRDPARTY_DIR, as_text=True, ): """ - Return the content from fetching the `filename` file name found in the - `paths_or_urls` list of URLs or paths and save to `dest_dir`. Raise an - Exception on errors. Treats the content as text if `as_text` is True - otherwise as binary. + Fetch content at ``path_or_url`` URL or path and save this to + ``dest_dir/filername``. Return the fetched content. Raise an Exception on + errors. Treats the content as text if as_text is True otherwise as treat as + binary. """ - path_or_url = get_link_for_filename( - filename=filename, - paths_or_urls=paths_or_urls, - ) - - return fetch_and_save_path_or_url( - filename=filename, - dest_dir=dest_dir, + content = CACHE.get( path_or_url=path_or_url, as_text=as_text, ) - - -def fetch_content_from_path_or_url_through_cache(path_or_url, as_text=True, cache=Cache()): - """ - Return the content from fetching at path or URL. Raise an Exception on - errors. Treats the content as text if as_text is True otherwise as treat as - binary. Use the provided file cache. This is the main entry for using the - cache. - - Note: the `cache` argument is a global, though it does not really matter - since it does not hold any state which is only kept on disk. - """ - if cache: - return cache.get(path_or_url=path_or_url, as_text=as_text) - else: - return get_file_content(path_or_url=path_or_url, as_text=as_text) - - -def fetch_and_save_path_or_url(filename, dest_dir, path_or_url, as_text=True, through_cache=True): - """ - Return the content from fetching the `filename` file name at URL or path - and save to `dest_dir`. Raise an Exception on errors. Treats the content as - text if as_text is True otherwise as treat as binary. - """ - if through_cache: - content = fetch_content_from_path_or_url_through_cache(path_or_url, as_text) - else: - content = fetch_content_from_path_or_url_through_cache(path_or_url, as_text, cache=None) - output = os.path.join(dest_dir, filename) - wmode = 'w' if as_text else 'wb' + wmode = "w" if as_text else "wb" with open(output, wmode) as fo: fo.write(content) return content ################################################################################ # -# Sync and fix local thirdparty directory for various issues and gaps +# Functions to update or fetch ABOUT and license files # ################################################################################ -def fetch_missing_sources(dest_dir=THIRDPARTY_DIR): - """ - Given a thirdparty dir, fetch missing source distributions from our remote - repo or PyPI. Return a list of (name, version) tuples for source - distribution that were not found - """ - not_found = [] - local_packages = get_local_packages(directory=dest_dir) - remote_repo = get_remote_repo() - pypi_repo = get_pypi_repo() - - for package in local_packages: - if not package.sdist: - print(f'Finding sources for: {package.name}=={package.version}: ', end='') - try: - pypi_package = pypi_repo.get_package( - name=package.name, version=package.version) - - if pypi_package and pypi_package.sdist: - print(f'Fetching sources from Pypi') - pypi_package.fetch_sdist(dest_dir=dest_dir) - continue - else: - remote_package = remote_repo.get_package( - name=package.name, version=package.version) - - if remote_package and remote_package.sdist: - print(f'Fetching sources from Remote') - remote_package.fetch_sdist(dest_dir=dest_dir) - continue - - except RemoteNotFetchedException as e: - print(f'Failed to fetch remote package info: {e}') - - print(f'No sources found') - not_found.append((package.name, package.version,)) - - return not_found - - -def fetch_missing_wheels( - python_versions=PYTHON_VERSIONS, - operating_systems=PLATFORMS_BY_OS, +def clean_about_files( dest_dir=THIRDPARTY_DIR, ): """ - Given a thirdparty dir fetch missing wheels for all known combos of Python - versions and OS. Return a list of tuple (Package, Environment) for wheels - that were not found locally or remotely. + Given a thirdparty dir, clean ABOUT files """ local_packages = get_local_packages(directory=dest_dir) - evts = itertools.product(python_versions, operating_systems) - environments = [Environment.from_pyver_and_os(pyv, os) for pyv, os in evts] - packages_and_envts = itertools.product(local_packages, environments) - - not_fetched = [] - fetched_filenames = set() - for package, envt in packages_and_envts: - - filename = package.fetch_wheel( - environment=envt, - fetched_filenames=fetched_filenames, - dest_dir=dest_dir, - ) - - if filename: - fetched_filenames.add(filename) - else: - not_fetched.append((package, envt,)) - - return not_fetched - - -def build_missing_wheels( - packages_and_envts, - build_remotely=False, - with_deps=False, - dest_dir=THIRDPARTY_DIR, -): - """ - Build all wheels in a list of tuple (Package, Environment) and save in - `dest_dir`. Return a list of tuple (Package, Environment), and a list of - built wheel filenames. - """ - - not_built = [] - built_filenames = [] - - packages_and_envts = itertools.groupby( - sorted(packages_and_envts), key=operator.itemgetter(0)) - - for package, pkg_envts in packages_and_envts: - - envts = [envt for _pkg, envt in pkg_envts] - python_versions = sorted(set(e.python_version for e in envts)) - operating_systems = sorted(set(e.operating_system for e in envts)) - built = None - try: - built = build_wheels( - requirements_specifier=package.specifier, - with_deps=with_deps, - build_remotely=build_remotely, - python_versions=python_versions, - operating_systems=operating_systems, - verbose=False, - dest_dir=dest_dir, - ) - print('.') - except Exception as e: - import traceback - print('#############################################################') - print('############# WHEEL BUILD FAILED ######################') - traceback.print_exc() - print() - print('#############################################################') - - if not built: - for envt in pkg_envts: - not_built.append((package, envt)) - else: - for bfn in built: - print(f' --> Built wheel: {bfn}') - built_filenames.append(bfn) - - return not_built, built_filenames - -################################################################################ -# -# Functions to handle remote or local repo used to "find-links" -# -################################################################################ - - -def get_paths_or_urls(links_url): - if links_url.startswith('https:'): - paths_or_urls = find_links_from_release_url(links_url) - else: - paths_or_urls = find_links_from_dir(links_url) - return paths_or_urls - - -def find_links_from_dir(directory=THIRDPARTY_DIR): - """ - Return a list of path to files in `directory` for any file that ends with - any of the extension in the list of `extensions` strings. - """ - base = os.path.abspath(directory) - files = [os.path.join(base, f) for f in os.listdir(base) if f.endswith(EXTENSIONS)] - return files - - -get_links = re.compile('href="([^"]+)"').findall - - -def find_links_from_release_url(links_url=REMOTE_LINKS_URL): - """ - Return a list of download link URLs found in the HTML page at `links_url` - URL that starts with the `prefix` string and ends with any of the extension - in the list of `extensions` strings. Use the `base_url` to prefix the links. - """ - if TRACE: print(f'Finding links for {links_url}') - - plinks_url = urllib.parse.urlparse(links_url) - - base_url = urllib.parse.SplitResult( - plinks_url.scheme, plinks_url.netloc, '', '', '').geturl() - - if TRACE: print(f'Base URL {base_url}') - - _headers, text = get_remote_file_content(links_url) - links = [] - for link in get_links(text): - if not link.endswith(EXTENSIONS): - continue - - plink = urllib.parse.urlsplit(link) - - if plink.scheme: - # full URL kept as-is - url = link - - if plink.path.startswith('/'): - # absolute link - url = f'{base_url}{link}' - - else: - # relative link - url = f'{links_url}/{link}' - - if TRACE: print(f'Adding URL: {url}') - - links.append(url) - - if TRACE: print(f'Found {len(links)} links at {links_url}') - return links - - -def find_pypi_links(name, simple_url=PYPI_SIMPLE_URL): - """ - Return a list of download link URLs found in a PyPI simple index for package name. - with the list of `extensions` strings. Use the `simple_url` PyPI url. - """ - if TRACE: print(f'Finding links for {simple_url}') - - name = name and NameVer.normalize_name(name) - simple_url = simple_url.strip('/') - simple_url = f'{simple_url}/{name}' - - _headers, text = get_remote_file_content(simple_url) - links = get_links(text) - # TODO: keep sha256 - links = [l.partition('#sha256=') for l in links] - links = [url for url, _, _sha256 in links] - links = [l for l in links if l.endswith(EXTENSIONS)] - return links - - -def get_link_for_filename(filename, paths_or_urls): - """ - Return a link for `filename` found in the `links` list of URLs or paths. Raise an - exception if no link is found or if there are more than one link for that - file name. - """ - path_or_url = [l for l in paths_or_urls if l.endswith(f'/{filename}')] - if not path_or_url: - raise Exception(f'Missing link to file: {filename}') - if not len(path_or_url) == 1: - raise Exception(f'Multiple links to file: {filename}: \n' + '\n'.join(path_or_url)) - return path_or_url[0] - -################################################################################ -# -# Requirements processing -# -################################################################################ - - -class MissingRequirementException(Exception): - pass - - -def get_required_packages(required_name_versions): - """ - Return a tuple of (remote packages, PyPI packages) where each is a mapping - of {(name, version): PypiPackage} for packages listed in the - `required_name_versions` list of (name, version) tuples. Raise a - MissingRequirementException with a list of missing (name, version) if a - requirement cannot be satisfied remotely or in PyPI. - """ - remote_repo = get_remote_repo() - - remote_packages = {(name, version): remote_repo.get_package(name, version) - for name, version in required_name_versions} - - pypi_repo = get_pypi_repo() - pypi_packages = {(name, version): pypi_repo.get_package(name, version) - for name, version in required_name_versions} - - # remove any empty package (e.g. that do not exist in some place) - remote_packages = {nv: p for nv, p in remote_packages.items() if p} - pypi_packages = {nv: p for nv, p in pypi_packages.items() if p} - - # check that we are not missing any - repos_name_versions = set(remote_packages.keys()) | set(pypi_packages.keys()) - missing_name_versions = required_name_versions.difference(repos_name_versions) - if missing_name_versions: - raise MissingRequirementException(sorted(missing_name_versions)) - - return remote_packages, pypi_packages - - -def get_required_remote_packages( - requirements_file='requirements.txt', - force_pinned=True, - remote_links_url=REMOTE_LINKS_URL, -): - """ - Yield tuple of (name, version, PypiPackage) for packages listed in the - `requirements_file` requirements file and found in the PyPI-like link repo - ``remote_links_url`` if this is a URL. Treat this ``remote_links_url`` as a - local directory path to a wheels directory if this is not a a URL. - """ - required_name_versions = load_requirements( - requirements_file=requirements_file, - force_pinned=force_pinned, - ) - - if remote_links_url.startswith('https://'): - repo = get_remote_repo(remote_links_url=remote_links_url) - else: - # a local path - assert os.path.exists(remote_links_url), f'Path does not exist: {remote_links_url}' - repo = get_local_repo(directory=remote_links_url) - - for name, version in required_name_versions: - if version: - yield name, version, repo.get_package(name, version) - else: - yield name, version, repo.get_latest_version(name) - - -def update_requirements(name, version=None, requirements_file='requirements.txt'): - """ - Upgrade or add `package_name` with `new_version` to the `requirements_file` - requirements file. Write back requirements sorted with name and version - canonicalized. Note: this cannot deal with hashed or unpinned requirements. - Do nothing if the version already exists as pinned. - """ - normalized_name = NameVer.normalize_name(name) - - is_updated = False - updated_name_versions = [] - for existing_name, existing_version in load_requirements(requirements_file, force_pinned=False): - - existing_normalized_name = NameVer.normalize_name(existing_name) - - if normalized_name == existing_normalized_name: - if version != existing_version: - is_updated = True - updated_name_versions.append((existing_normalized_name, existing_version,)) - - if is_updated: - updated_name_versions = sorted(updated_name_versions) - nvs = '\n'.join(f'{name}=={version}' for name, version in updated_name_versions) - - with open(requirements_file, 'w') as fo: - fo.write(nvs) - - -def hash_requirements(dest_dir=THIRDPARTY_DIR, requirements_file='requirements.txt'): - """ - Hash all the requirements found in the `requirements_file` - requirements file based on distributions available in `dest_dir` - """ - local_repo = get_local_repo(directory=dest_dir) - packages_by_normalized_name_version = local_repo.packages_by_normalized_name_version - hashed = [] - for name, version in load_requirements(requirements_file, force_pinned=True): - package = packages_by_normalized_name_version.get((name, version)) - if not package: - raise Exception(f'Missing required package {name}=={version}') - hashed.append(package.specifier_with_hashes) - - with open(requirements_file, 'w') as fo: - fo.write('\n'.join(hashed)) + for local_package in local_packages: + for local_dist in local_package.get_distributions(): + local_dist.load_about_data(dest_dir=dest_dir) + local_dist.set_checksums(dest_dir=dest_dir) -################################################################################ -# -# Functions to update or fetch ABOUT and license files -# -################################################################################ + if "classifiers" in local_dist.extra_data: + local_dist.extra_data.pop("classifiers", None) + local_dist.save_about_and_notice_files(dest_dir) -def add_fetch_or_update_about_and_license_files(dest_dir=THIRDPARTY_DIR, include_remote=True): +def fetch_abouts_and_licenses(dest_dir=THIRDPARTY_DIR, use_cached_index=False): """ Given a thirdparty dir, add missing ABOUT. LICENSE and NOTICE files using best efforts: @@ -2415,23 +1987,24 @@ def add_fetch_or_update_about_and_license_files(dest_dir=THIRDPARTY_DIR, include - derive from existing distribution with same name and latest version that would have such ABOUT file - extract ABOUT file data from distributions PKGINFO or METADATA files - - TODO: make API calls to fetch package data from DejaCode - The process consists in load and iterate on every package distributions, - collect data and then acsk to save. + Use available existing on-disk cached index if use_cached_index is True. """ - local_packages = get_local_packages(directory=dest_dir) - local_repo = get_local_repo(directory=dest_dir) - - remote_repo = get_remote_repo() - def get_other_dists(_package, _dist): """ - Return a list of all the dists from package that are not the `dist` object + Return a list of all the dists from `_package` that are not the `_dist` + object """ return [d for d in _package.get_distributions() if d != _dist] + local_packages = get_local_packages(directory=dest_dir) + packages_by_name = defaultdict(list) + for local_package in local_packages: + distributions = list(local_package.get_distributions()) + distribution = distributions[0] + packages_by_name[distribution.name].append(local_package) + for local_package in local_packages: for local_dist in local_package.get_distributions(): local_dist.load_about_data(dest_dir=dest_dir) @@ -2440,7 +2013,7 @@ def get_other_dists(_package, _dist): # if has key data we may look to improve later, but we can move on if local_dist.has_key_metadata(): local_dist.save_about_and_notice_files(dest_dir=dest_dir) - local_dist.fetch_license_files(dest_dir=dest_dir) + local_dist.fetch_license_files(dest_dir=dest_dir, use_cached_index=use_cached_index) continue # lets try to get from another dist of the same local package @@ -2452,18 +2025,18 @@ def get_other_dists(_package, _dist): # if has key data we may look to improve later, but we can move on if local_dist.has_key_metadata(): local_dist.save_about_and_notice_files(dest_dir=dest_dir) - local_dist.fetch_license_files(dest_dir=dest_dir) + local_dist.fetch_license_files(dest_dir=dest_dir, use_cached_index=use_cached_index) continue - # try to get a latest version of the same package that is not our version + # try to get another version of the same package that is not our version other_local_packages = [ - p for p in local_repo.get_versions(local_package.name) + p + for p in packages_by_name[local_package.name] if p.version != local_package.version ] - - latest_local_version = other_local_packages and other_local_packages[-1] - if latest_local_version: - latest_local_dists = list(latest_local_version.get_distributions()) + other_local_version = other_local_packages and other_local_packages[-1] + if other_local_version: + latest_local_dists = list(other_local_version.get_distributions()) for latest_local_dist in latest_local_dists: latest_local_dist.load_about_data(dest_dir=dest_dir) if not latest_local_dist.has_key_metadata(): @@ -2478,45 +2051,47 @@ def get_other_dists(_package, _dist): # if has key data we may look to improve later, but we can move on if local_dist.has_key_metadata(): local_dist.save_about_and_notice_files(dest_dir=dest_dir) - local_dist.fetch_license_files(dest_dir=dest_dir) + local_dist.fetch_license_files(dest_dir=dest_dir, use_cached_index=use_cached_index) continue - if include_remote: - # lets try to fetch remotely - local_dist.load_remote_about_data() + # lets try to fetch remotely + local_dist.load_remote_about_data() + + # if has key data we may look to improve later, but we can move on + if local_dist.has_key_metadata(): + local_dist.save_about_and_notice_files(dest_dir=dest_dir) + local_dist.fetch_license_files(dest_dir=dest_dir, use_cached_index=use_cached_index) + continue + + # try to get a latest version of the same package that is not our version + # and that is in our self hosted repo + lpv = local_package.version + lpn = local_package.name + + other_remote_packages = [ + p for v, p in PYPI_SELFHOSTED_REPO.get_package_versions(lpn).items() if v != lpv + ] + + latest_version = other_remote_packages and other_remote_packages[-1] + if latest_version: + latest_dists = list(latest_version.get_distributions()) + for remote_dist in latest_dists: + remote_dist.load_remote_about_data() + if not remote_dist.has_key_metadata(): + # there is not much value to get other data if we are missing the key ones + continue + else: + local_dist.update_from_other_dist(remote_dist) + # if has key data we may look to improve later, but we can move on + if local_dist.has_key_metadata(): + break # if has key data we may look to improve later, but we can move on if local_dist.has_key_metadata(): local_dist.save_about_and_notice_files(dest_dir=dest_dir) - local_dist.fetch_license_files(dest_dir=dest_dir) + local_dist.fetch_license_files(dest_dir=dest_dir, use_cached_index=use_cached_index) continue - # try to get a latest version of the same package that is not our version - other_remote_packages = [ - p for p in remote_repo.get_versions(local_package.name) - if p.version != local_package.version - ] - - latest_version = other_remote_packages and other_remote_packages[-1] - if latest_version: - latest_dists = list(latest_version.get_distributions()) - for remote_dist in latest_dists: - remote_dist.load_remote_about_data() - if not remote_dist.has_key_metadata(): - # there is not much value to get other data if we are missing the key ones - continue - else: - local_dist.update_from_other_dist(remote_dist) - # if has key data we may look to improve later, but we can move on - if local_dist.has_key_metadata(): - break - - # if has key data we may look to improve later, but we can move on - if local_dist.has_key_metadata(): - local_dist.save_about_and_notice_files(dest_dir=dest_dir) - local_dist.fetch_license_files(dest_dir=dest_dir) - continue - # try to get data from pkginfo (no license though) local_dist.load_pkginfo_data(dest_dir=dest_dir) @@ -2524,15 +2099,13 @@ def get_other_dists(_package, _dist): # if local_dist.has_key_metadata() or not local_dist.has_key_metadata(): local_dist.save_about_and_notice_files(dest_dir) - lic_errs = local_dist.fetch_license_files(dest_dir) - - # TODO: try to get data from dejacode + lic_errs = local_dist.fetch_license_files(dest_dir, use_cached_index=use_cached_index) if not local_dist.has_key_metadata(): - print(f'Unable to add essential ABOUT data for: {local_dist}') + print(f"Unable to add essential ABOUT data for: {local_dist}") if lic_errs: - lic_errs = '\n'.join(lic_errs) - print(f'Failed to fetch some licenses:: {lic_errs}') + lic_errs = "\n".join(lic_errs) + print(f"Failed to fetch some licenses:: {lic_errs}") ################################################################################ # @@ -2541,397 +2114,116 @@ def get_other_dists(_package, _dist): ################################################################################ -def call(args): +def call(args, verbose=TRACE): """ - Call args in a subprocess and display output on the fly. - Return or raise stdout, stderr, returncode + Call args in a subprocess and display output on the fly if ``trace`` is True. + Return a tuple of (returncode, stdout, stderr) """ - if TRACE: print('Calling:', ' '.join(args)) + if TRACE_DEEP: + print("Calling:", " ".join(args)) with subprocess.Popen( - args, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - encoding='utf-8' + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding="utf-8" ) as process: + stdouts = [] while True: line = process.stdout.readline() if not line and process.poll() is not None: break - if TRACE: print(line.rstrip(), flush=True) + stdouts.append(line) + if verbose: + print(line.rstrip(), flush=True) stdout, stderr = process.communicate() - returncode = process.returncode - if returncode == 0: - return returncode, stdout, stderr - else: - raise Exception(returncode, stdout, stderr) - - -def add_or_upgrade_built_wheels( - name, - version=None, - python_versions=PYTHON_VERSIONS, - operating_systems=PLATFORMS_BY_OS, - dest_dir=THIRDPARTY_DIR, - build_remotely=False, - with_deps=False, - verbose=False, -): - """ - Add or update package `name` and `version` as a binary wheel saved in - `dest_dir`. Use the latest version if `version` is None. Return the a list - of the collected, fetched or built wheel file names or an empty list. - - Use the provided lists of `python_versions` (e.g. "36", "39") and - `operating_systems` (e.g. linux, windows or macos) to decide which specific - wheel to fetch or build. - - Include wheels for all dependencies if `with_deps` is True. - Build remotely is `build_remotely` is True. - """ - assert name, 'Name is required' - ver = version and f'=={version}' or '' - print(f'\nAdding wheels for package: {name}{ver}') - - wheel_filenames = [] - # a mapping of {req specifier: {mapping build_wheels kwargs}} - wheels_to_build = {} - for python_version, operating_system in itertools.product(python_versions, operating_systems): - print(f' Adding wheels for package: {name}{ver} on {python_version,} and {operating_system}') - environment = Environment.from_pyver_and_os(python_version, operating_system) - - # Check if requested wheel already exists locally for this version - local_repo = get_local_repo(directory=dest_dir) - local_package = local_repo.get_package(name=name, version=version) - - has_local_wheel = False - if version and local_package: - for wheel in local_package.get_supported_wheels(environment): - has_local_wheel = True - wheel_filenames.append(wheel.filename) - break - if has_local_wheel: - print(f' local wheel exists: {wheel.filename}') - continue - - if not version: - pypi_package = get_pypi_repo().get_latest_version(name) - version = pypi_package.version - - # Check if requested wheel already exists remotely or in Pypi for this version - wheel_filename = fetch_package_wheel( - name=name, version=version, environment=environment, dest_dir=dest_dir) - if wheel_filename: - wheel_filenames.append(wheel_filename) - - # the wheel is not available locally, remotely or in Pypi - # we need to build binary from sources - requirements_specifier = f'{name}=={version}' - to_build = wheels_to_build.get(requirements_specifier) - if to_build: - to_build['python_versions'].append(python_version) - to_build['operating_systems'].append(operating_system) - else: - wheels_to_build[requirements_specifier] = dict( - requirements_specifier=requirements_specifier, - python_versions=[python_version], - operating_systems=[operating_system], - dest_dir=dest_dir, - build_remotely=build_remotely, - with_deps=with_deps, - verbose=verbose, - ) - - for build_wheels_kwargs in wheels_to_build.values(): - bwheel_filenames = build_wheels(**build_wheels_kwargs) - wheel_filenames.extend(bwheel_filenames) - - return sorted(set(wheel_filenames)) - - -def build_wheels( - requirements_specifier, - python_versions=PYTHON_VERSIONS, - operating_systems=PLATFORMS_BY_OS, - dest_dir=THIRDPARTY_DIR, - build_remotely=False, - with_deps=False, - verbose=False, -): - """ - Given a pip `requirements_specifier` string (such as package names or as - name==version), build the corresponding binary wheel(s) for all - `python_versions` and `operating_systems` combinations and save them - back in `dest_dir` and return a list of built wheel file names. - - Include wheels for all dependencies if `with_deps` is True. - - First try to build locally to process pure Python wheels, and fall back to - build remotey on all requested Pythons and operating systems. - """ - all_pure, builds = build_wheels_locally_if_pure_python( - requirements_specifier=requirements_specifier, - with_deps=with_deps, - verbose=verbose, - dest_dir=dest_dir, - ) - for local_build in builds: - print(f'Built wheel: {local_build}') - - if all_pure: - return builds - - if build_remotely: - remote_builds = build_wheels_remotely_on_multiple_platforms( - requirements_specifier=requirements_specifier, - with_deps=with_deps, - python_versions=python_versions, - operating_systems=operating_systems, - verbose=verbose, - dest_dir=dest_dir, - ) - builds.extend(remote_builds) + if not stdout.strip(): + stdout = "\n".join(stdouts) + return process.returncode, stdout, stderr - return builds - -def build_wheels_remotely_on_multiple_platforms( - requirements_specifier, - with_deps=False, - python_versions=PYTHON_VERSIONS, - operating_systems=PLATFORMS_BY_OS, - verbose=False, - dest_dir=THIRDPARTY_DIR, -): - """ - Given pip `requirements_specifier` string (such as package names or as - name==version), build the corresponding binary wheel(s) including wheels for - all dependencies for all `python_versions` and `operating_systems` - combinations and save them back in `dest_dir` and return a list of built - wheel file names. - """ - check_romp_is_configured() - pyos_options = get_romp_pyos_options(python_versions, operating_systems) - deps = '' if with_deps else '--no-deps' - verbose = '--verbose' if verbose else '' - - romp_args = ([ - 'romp', - '--interpreter', 'cpython', - '--architecture', 'x86_64', - '--check-period', '5', # in seconds - - ] + pyos_options + [ - - '--artifact-paths', '*.whl', - '--artifact', 'artifacts.tar.gz', - '--command', - # create a virtualenv, upgrade pip -# f'python -m ensurepip --user --upgrade; ' - f'python -m pip {verbose} install --user --upgrade pip setuptools wheel; ' - f'python -m pip {verbose} wheel {deps} {requirements_specifier}', - ]) - - if verbose: - romp_args.append('--verbose') - - print(f'Building wheels for: {requirements_specifier}') - print(f'Using command:', ' '.join(romp_args)) - call(romp_args) - - wheel_filenames = extract_tar('artifacts.tar.gz', dest_dir) - for wfn in wheel_filenames: - print(f' built wheel: {wfn}') - return wheel_filenames - - -def get_romp_pyos_options( - python_versions=PYTHON_VERSIONS, - operating_systems=PLATFORMS_BY_OS, -): - """ - Return a list of CLI options for romp - For example: - >>> expected = ['--version', '3.6', '--version', '3.7', '--version', '3.8', - ... '--version', '3.9', '--platform', 'linux', '--platform', 'macos', - ... '--platform', 'windows'] - >>> assert get_romp_pyos_options() == expected - """ - python_dot_versions = ['.'.join(pv) for pv in sorted(set(python_versions))] - pyos_options = list(itertools.chain.from_iterable( - ('--version', ver) for ver in python_dot_versions)) - - pyos_options += list(itertools.chain.from_iterable( - ('--platform' , plat) for plat in sorted(set(operating_systems)))) - - return pyos_options - - -def check_romp_is_configured(): - # these environment variable must be set before - has_envt = ( - os.environ.get('ROMP_BUILD_REQUEST_URL') and - os.environ.get('ROMP_DEFINITION_ID') and - os.environ.get('ROMP_PERSONAL_ACCESS_TOKEN') and - os.environ.get('ROMP_USERNAME') - ) - - if not has_envt: - raise Exception( - 'ROMP_BUILD_REQUEST_URL, ROMP_DEFINITION_ID, ' - 'ROMP_PERSONAL_ACCESS_TOKEN and ROMP_USERNAME ' - 'are required enironment variables.') - - -def build_wheels_locally_if_pure_python( - requirements_specifier, - with_deps=False, - verbose=False, +def download_wheels_with_pip( + requirements_specifiers=tuple(), + requirements_files=tuple(), + environment=None, dest_dir=THIRDPARTY_DIR, + index_url=PYPI_SIMPLE_URL, + links_url=ABOUT_LINKS_URL, ): """ - Given pip `requirements_specifier` string (such as package names or as - name==version), build the corresponding binary wheel(s) locally. - - If all these are "pure" Python wheels that run on all Python 3 versions and - operating systems, copy them back in `dest_dir` if they do not exists there - - Return a tuple of (True if all wheels are "pure", list of built wheel file names) + Fetch binary wheel(s) using pip for the ``envt`` Environment given a list of + pip ``requirements_files`` and a list of ``requirements_specifiers`` string + (such as package names or as name==version). + Return a tuple of (list of downloaded files, error string). + Do NOT fail on errors, but return an error message on failure. """ - deps = [] if with_deps else ['--no-deps'] - verbose = ['--verbose'] if verbose else [] - wheel_dir = tempfile.mkdtemp(prefix='scancode-release-wheels-local-') cli_args = [ - 'pip', 'wheel', - '--wheel-dir', wheel_dir, - ] + deps + verbose + [ - requirements_specifier - ] - - print(f'Building local wheels for: {requirements_specifier}') - print(f'Using command:', ' '.join(cli_args)) - call(cli_args) - - built = os.listdir(wheel_dir) - if not built: - return [] - - all_pure = all(is_pure_wheel(bwfn) for bwfn in built) - - if not all_pure: - print(f' Some wheels are not pure') - - print(f' Copying local wheels') - pure_built = [] - for bwfn in built: - owfn = os.path.join(dest_dir, bwfn) - if not os.path.exists(owfn): - nwfn = os.path.join(wheel_dir, bwfn) - fileutils.copyfile(nwfn, owfn) - pure_built.append(bwfn) - print(f' Built local wheel: {bwfn}') - return all_pure, pure_built - - -# TODO: Use me -def optimize_wheel(wheel_filename, dest_dir=THIRDPARTY_DIR): - """ - Optimize a wheel named `wheel_filename` in `dest_dir` such as renaming its - tags for PyPI compatibility and making it smaller if possible. Return the - name of the new wheel if renamed or the existing new name otherwise. - """ - if is_pure_wheel(wheel_filename): - print(f'Pure wheel: {wheel_filename}, nothing to do.') - return wheel_filename - - original_wheel_loc = os.path.join(dest_dir, wheel_filename) - wheel_dir = tempfile.mkdtemp(prefix='scancode-release-wheels-') - awargs = [ - 'auditwheel', - 'addtag', - '--wheel-dir', wheel_dir, - original_wheel_loc + "pip", + "download", + "--only-binary", + ":all:", + "--dest", + dest_dir, + "--index-url", + index_url, + "--find-links", + links_url, + "--no-color", + "--progress-bar", + "off", + "--no-deps", + "--no-build-isolation", + "--verbose", + # "--verbose", ] - call(awargs) - - audited = os.listdir(wheel_dir) - if not audited: - # cannot optimize wheel - return wheel_filename - - assert len(audited) == 1 - new_wheel_name = audited[0] - - new_wheel_loc = os.path.join(wheel_dir, new_wheel_name) - - # this needs to go now - os.remove(original_wheel_loc) - - if new_wheel_name == wheel_filename: - os.rename(new_wheel_loc, original_wheel_loc) - return wheel_filename - - new_wheel = Wheel.from_filename(new_wheel_name) - non_pypi_plats = utils_pypi_supported_tags.validate_platforms_for_pypi(new_wheel.platforms) - new_wheel.platforms = [p for p in new_wheel.platforms if p not in non_pypi_plats] - if not new_wheel.platforms: - print(f'Cannot make wheel PyPI compatible: {original_wheel_loc}') - os.rename(new_wheel_loc, original_wheel_loc) - return wheel_filename - - new_wheel_cleaned_filename = new_wheel.to_filename() - new_wheel_cleaned_loc = os.path.join(dest_dir, new_wheel_cleaned_filename) - os.rename(new_wheel_loc, new_wheel_cleaned_loc) - return new_wheel_cleaned_filename + if environment: + eopts = environment.get_pip_cli_options() + cli_args.extend(eopts) + else: + print("WARNING: no download environment provided.") -def extract_tar(location, dest_dir=THIRDPARTY_DIR,): - """ - Extract a tar archive at `location` in the `dest_dir` directory. Return a - list of extracted locations (either directories or files). - """ - with open(location, 'rb') as fi: - with tarfile.open(fileobj=fi) as tar: - members = list(tar.getmembers()) - tar.extractall(dest_dir, members=members) + cli_args.extend(requirements_specifiers) + for req_file in requirements_files: + cli_args.extend(["--requirement", req_file]) - return [os.path.basename(ti.name) for ti in members - if ti.type == tarfile.REGTYPE] + if TRACE: + print(f"Downloading wheels using command:", " ".join(cli_args)) + existing = set(os.listdir(dest_dir)) + error = False + try: + returncode, _stdout, stderr = call(cli_args, verbose=True) + if returncode != 0: + error = stderr + except Exception as e: + error = str(e) -def fetch_package_wheel(name, version, environment, dest_dir=THIRDPARTY_DIR): - """ - Fetch the binary wheel for package `name` and `version` and save in - `dest_dir`. Use the provided `environment` Environment to determine which - specific wheel to fetch. + if error: + print() + print("###########################################################################") + print("##################### Failed to fetch all wheels ##########################") + print("###########################################################################") + print(error) + print() + print("###########################################################################") - Return the fetched wheel file name on success or None if it was not fetched. - Trying fetching from our own remote repo, then from PyPI. - """ - wheel_filename = None - remote_package = get_remote_package(name=name, version=version) - if remote_package: - wheel_filename = remote_package.fetch_wheel( - environment=environment, dest_dir=dest_dir) - if wheel_filename: - return wheel_filename + downloaded = existing ^ set(os.listdir(dest_dir)) + return sorted(downloaded), error - pypi_package = get_pypi_package(name=name, version=version) - if pypi_package: - wheel_filename = pypi_package.fetch_wheel( - environment=environment, dest_dir=dest_dir) - return wheel_filename +################################################################################ +# +# Functions to check for problems +# +################################################################################ def check_about(dest_dir=THIRDPARTY_DIR): try: - subprocess.check_output(f'venv/bin/about check {dest_dir}'.split()) + subprocess.check_output(f"venv/bin/about check {dest_dir}".split()) except subprocess.CalledProcessError as cpe: print() - print('Invalid ABOUT files:') - print(cpe.output.decode('utf-8', errors='replace')) + print("Invalid ABOUT files:") + print(cpe.output.decode("utf-8", errors="replace")) def find_problems( @@ -2947,32 +2239,36 @@ def find_problems( for package in local_packages: if report_missing_sources and not package.sdist: - print(f'{package.name}=={package.version}: Missing source distribution.') + print(f"{package.name}=={package.version}: Missing source distribution.") if report_missing_wheels and not package.wheels: - print(f'{package.name}=={package.version}: Missing wheels.') + print(f"{package.name}=={package.version}: Missing wheels.") for dist in package.get_distributions(): dist.load_about_data(dest_dir=dest_dir) abpth = os.path.abspath(os.path.join(dest_dir, dist.about_filename)) if not dist.has_key_metadata(): - print(f' Missing key ABOUT data in file://{abpth}') - if 'classifiers' in dist.extra_data: - print(f' Dangling classifiers data in file://{abpth}') + print(f" Missing key ABOUT data in file://{abpth}") + if "classifiers" in dist.extra_data: + print(f" Dangling classifiers data in file://{abpth}") if not dist.validate_checksums(dest_dir): - print(f' Invalid checksums in file://{abpth}') + print(f" Invalid checksums in file://{abpth}") if not dist.sha1 and dist.md5: - print(f' Missing checksums in file://{abpth}') + print(f" Missing checksums in file://{abpth}") check_about(dest_dir=dest_dir) def compute_normalized_license_expression(declared_licenses): + """ + Return a normalized license expression or None. + """ if not declared_licenses: return try: from packagedcode import pypi + return pypi.compute_normalized_license(declared_licenses) except ImportError: # Scancode is not installed, clean and join all the licenses lics = [python_safe_name(l).lower() for l in declared_licenses] - return ' AND '.join(lics).lower() + return " AND ".join(lics).lower() diff --git a/pyproject.toml b/pyproject.toml index 1e10f32..cde7907 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,7 +35,9 @@ norecursedirs = [ "tmp", "venv", "tests/data", - ".eggs" + ".eggs", + "src/*/data", + "tests/*/data" ] python_files = "*.py" diff --git a/requirements-dev.txt b/requirements-dev.txt index 4dcff74..fe92ed8 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,9 +1,24 @@ +aboutcode-toolkit==7.0.1 +bleach==4.1.0 +build==0.7.0 +commonmark==0.9.1 +docutils==0.18.1 +et-xmlfile==1.1.0 execnet==1.9.0 iniconfig==1.1.1 -packaging==21.0 -py==1.10.0 -pyparsing==2.4.7 -pytest==6.2.5 -pytest-forked==1.3.0 -pytest-xdist==2.4.0 -toml==0.10.2 \ No newline at end of file +jeepney==0.7.1 +keyring==23.4.1 +openpyxl==3.0.9 +pep517==0.12.0 +pkginfo==1.8.2 +py==1.11.0 +pytest==7.0.1 +pytest-forked==1.4.0 +pytest-xdist==2.5.0 +readme-renderer==34.0 +requests-toolbelt==0.9.1 +rfc3986==1.5.0 +rich==12.3.0 +secretstorage==3.3.2 +tomli==1.2.3 +twine==3.8.0 diff --git a/requirements.txt b/requirements.txt index c5d3aad..627778d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,36 +1,79 @@ -attrs==21.2.0 -beautifulsoup4==4.10.0 +attrs==21.4.0 +banal==1.0.6 +beautifulsoup4==4.11.1 binaryornot==0.4.4 -certifi==2021.5.30 -cffi==1.14.6 +boolean.py==3.8 +certifi==2021.10.8 +cffi==1.15.0 chardet==4.0.0 -charset-normalizer==2.0.6 -click==8.0.1 -commoncode==30.0.0 -cryptography==35.0.0 +charset-normalizer==2.0.12 +click==8.0.4 +colorama==0.4.4 +commoncode==30.2.0 +construct==2.10.68 +container-inspector==30.0.0 +cryptography==36.0.2 +debian-inspector==30.0.0 +dockerfile-parse==1.2.0 +dparse2==0.6.1 extractcode-7z==16.5.210531 extractcode-libarchive==3.5.1.210531 -idna==3.2 -importlib-metadata==4.8.1 -intbitset==2.4.1 +fasteners==0.17.3 +fingerprints==1.0.3 +ftfy==6.0.3 +future==0.18.2 +gemfileparser==0.8.0 +html5lib==1.1 +idna==3.3 +importlib-metadata==4.8.3 +inflection==0.5.1 +intbitset==3.0.1 +isodate==0.6.1 +jaraco.functools==3.4.0 +javaproperties==0.8.1 +Jinja2==3.0.3 +jsonstreams==0.6.0 +license-expression==21.6.14 +lxml==4.8.0 +MarkupSafe==2.0.1 +more-itertools==8.13.0 +normality==2.3.3 +packagedcode-msitools==0.101.210706 +packageurl-python==0.9.9 +packaging==21.3 +parameter-expansion-patched==0.3.1 patch==1.16 -pdfminer.six==20201018 -pip==21.2.4 +pdfminer.six==20220506 +pefile==2021.9.3 +pip-requirements-parser==31.2.0 +pkginfo2==30.0.0 pluggy==1.0.0 plugincode==21.1.21 -pycparser==2.20 -PyYAML==5.4.1 -requests==2.26.0 +ply==3.11 +publicsuffix2==2.20191221 +pyahocorasick==2.0.0b1 +pycparser==2.21 +pygmars==0.7.0 +Pygments==2.12.0 +pymaven-patch==0.3.0 +pyparsing==3.0.8 +pytz==2022.1 +PyYAML==6.0 +rdflib==5.0.0 +regipy==2.2.2 +requests==2.27.1 +rpm-inspector-rpm==4.16.1.3.210404 saneyaml==0.5.2 -setuptools==58.1.0 six==1.16.0 -sortedcontainers==2.4.0 -soupsieve==2.2.1 +soupsieve==2.3.1 +spdx-tools==0.7.0a3 text-unidecode==1.3 +toml==0.10.2 typecode==21.6.1 typecode-libmagic==5.39.210531 -typing==3.6.6 -typing-extensions==3.10.0.2 -urllib3==1.26.7 -wheel==0.37.0 +urllib3==1.26.9 +urlpy==0.5 +wcwidth==0.2.5 +webencodings==0.5.1 +xmltodict==0.12.0 zipp==3.6.0 diff --git a/setup.cfg b/setup.cfg index 074ef86..e4eeb8e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,25 +1,29 @@ [metadata] -license_files = - apache-2.0.LICENSE - NOTICE - AUTHORS.rst - CHANGELOG.rst name = extractcode -author = nexB. Inc. and others -author_email = info@aboutcode.org license = Apache-2.0 # description must be on ONE line https://github.com/pypa/setuptools/issues/1390 description = A mostly universal archive extractor using 7zip, libarchive and the Python standard library for reliable archive extraction. long_description = file:README.rst +long_description_content_type = text/x-rst url = https://github.com/nexB/extractcode + +author = nexB. Inc. and others +author_email = info@aboutcode.org + classifiers = Development Status :: 5 - Production/Stable Intended Audience :: Developers Programming Language :: Python :: 3 Programming Language :: Python :: 3 :: Only + Programming Language :: Python :: 3.6 + Programming Language :: Python :: 3.7 + Programming Language :: Python :: 3.8 + Programming Language :: Python :: 3.9 + Programming Language :: Python :: 3.10 Topic :: Software Development Topic :: Utilities + keywords = utilities archive @@ -49,23 +53,32 @@ keywords = patch scancode-toolkit +license_files = + apache-2.0.LICENSE + NOTICE + AUTHORS.rst + CHANGELOG.rst + CODE_OF_CONDUCT.rst + [options] -package_dir= +package_dir = =src -packages=find: +packages = find: include_package_data = true zip_safe = false + +python_requires = >=3.6.* + install_requires = attrs >= 18.1, !=20.1.0 - commoncode >= 21.5.25 + commoncode >= 30.2.0 plugincode >= 21.1.21 typecode >= 21.6.1 - patch >= 1.16 - -setup_requires = setuptools_scm[toml] >= 4 + six [options.packages.find] -where=src +where = src + [options.extras_require] full = @@ -73,15 +86,19 @@ full = extractcode_libarchive >= 3.5.1.210525 typecode[full] >= 21.6.1 +patch = + patch >= 1.16 + testing = - # upstream - pytest >= 6 + pytest >= 6, != 7.0.0 pytest-xdist >= 2 + aboutcode-toolkit >= 6.0.0 + black -docs= - Sphinx>=3.3.1 - sphinx-rtd-theme>=0.5.0 - doc8>=0.8.1 +docs = + Sphinx >= 3.3.1 + sphinx-rtd-theme >= 0.5.0 + doc8 >= 0.8.1 [options.entry_points] console-scripts = diff --git a/src/extractcode/archive.py b/src/extractcode/archive.py index 81135b3..d52399e 100644 --- a/src/extractcode/archive.py +++ b/src/extractcode/archive.py @@ -1194,5 +1194,11 @@ def try_to_extract(location, target_dir, extractor): QCOWHandler, VMDKHandler, VirtualBoxHandler, - PatchHandler, ] + +# only support extracting patches if patch is installed. This is not a default +try: + import patch as _pythonpatch + archive_handlers.append(PatchHandler) +except: + pass diff --git a/src/extractcode/patch.py b/src/extractcode/patch.py index 0225e45..3588695 100644 --- a/src/extractcode/patch.py +++ b/src/extractcode/patch.py @@ -11,8 +11,6 @@ import logging import os.path -import patch as pythonpatch - from commoncode import paths from commoncode import fileutils from commoncode import text @@ -136,6 +134,7 @@ def patch_info(location): Raise an exception if the file is not a patch file or cannot be parsed. """ + import patch as pythonpatch patchset = pythonpatch.fromfile(location) if not patchset: msg = 'Unable to parse patch file: %(location)s' % locals() diff --git a/tests/test_patch.py b/tests/test_patch.py index 5a70951..c3f4008 100644 --- a/tests/test_patch.py +++ b/tests/test_patch.py @@ -17,6 +17,12 @@ from extractcode import patch +try: + import patch as _pythonpatch +except ImportError: + import pytest + pytestmark = pytest.mark.skipif(True, reason="Run only if patch is installed.") + class TestIsPatch(FileBasedTesting): test_data_dir = os.path.join(os.path.dirname(__file__), 'data') diff --git a/tests/test_skeleton_codestyle.py b/tests/test_skeleton_codestyle.py new file mode 100644 index 0000000..2eb6e55 --- /dev/null +++ b/tests/test_skeleton_codestyle.py @@ -0,0 +1,36 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# ScanCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/skeleton for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import subprocess +import unittest +import configparser + + +class BaseTests(unittest.TestCase): + def test_skeleton_codestyle(self): + """ + This test shouldn't run in proliferated repositories. + """ + setup_cfg = configparser.ConfigParser() + setup_cfg.read("setup.cfg") + if setup_cfg["metadata"]["name"] != "skeleton": + return + + args = "venv/bin/black --check -l 100 setup.py etc tests" + try: + subprocess.check_output(args.split()) + except subprocess.CalledProcessError as e: + print("===========================================================") + print(e.output) + print("===========================================================") + raise Exception( + "Black style check failed; please format the code using:\n" + " python -m black -l 100 setup.py etc tests", + e.output, + ) from e