listing URLs to all distribution types and versions). - Repositories and Distributions are related through filenames. + +- while we have package reqs in TODO queue, process one requirement: + - for each PyPI simple index: + - fetch through cache the PyPI simple index for this package + - for each environment: + - find a wheel matching pinned requirement in this index + - if file exist locally, continue + - fetch the wheel for env + - IF pure, break, no more needed for env + - collect requirement deps from wheel metadata and add to queue + - if fetched, break, otherwise display error message - The Wheel models code is partially derived from the mit-licensed pip and the - Distribution/Wheel/Sdist design has been heavily inspired by the packaging- - dists library https://github.com/uranusjr/packaging-dists by Tzu-ping Chung """ TRACE = False +TRACE_DEEP = False +TRACE_ULTRA_DEEP = False # Supported environments -PYTHON_VERSIONS = '36', '37', '38', '39', +PYTHON_VERSIONS = "36", "37", "38", "39", "310" + +PYTHON_DOT_VERSIONS_BY_VER = { + "36": "3.6", + "37": "3.7", + "38": "3.8", + "39": "3.9", + "310": "3.10", +} + + +def get_python_dot_version(version): + """ + Return a dot version from a plain, non-dot version. + """ + return PYTHON_DOT_VERSIONS_BY_VER[version] + ABIS_BY_PYTHON_VERSION = { - '36':['cp36', 'cp36m'], - '37':['cp37', 'cp37m'], - '38':['cp38', 'cp38m'], - '39':['cp39', 'cp39m'], + "36": ["cp36", "cp36m", "abi3"], + "37": ["cp37", "cp37m", "abi3"], + "38": ["cp38", "cp38m", "abi3"], + "39": ["cp39", "cp39m", "abi3"], + "310": ["cp310", "cp310m", "abi3"], } PLATFORMS_BY_OS = { - 'linux': [ - 'linux_x86_64', - 'manylinux1_x86_64', - 'manylinux2014_x86_64', - 'manylinux2010_x86_64', + "linux": [ + "linux_x86_64", + "manylinux1_x86_64", + "manylinux2010_x86_64", + "manylinux2014_x86_64", ], - 'macos': [ - 'macosx_10_6_intel', 'macosx_10_6_x86_64', - 'macosx_10_9_intel', 'macosx_10_9_x86_64', - 'macosx_10_10_intel', 'macosx_10_10_x86_64', - 'macosx_10_11_intel', 'macosx_10_11_x86_64', - 'macosx_10_12_intel', 'macosx_10_12_x86_64', - 'macosx_10_13_intel', 'macosx_10_13_x86_64', - 'macosx_10_14_intel', 'macosx_10_14_x86_64', - 'macosx_10_15_intel', 'macosx_10_15_x86_64', + "macos": [ + "macosx_10_6_intel", + "macosx_10_6_x86_64", + "macosx_10_9_intel", + "macosx_10_9_x86_64", + "macosx_10_10_intel", + "macosx_10_10_x86_64", + "macosx_10_11_intel", + "macosx_10_11_x86_64", + "macosx_10_12_intel", + "macosx_10_12_x86_64", + "macosx_10_13_intel", + "macosx_10_13_x86_64", + "macosx_10_14_intel", + "macosx_10_14_x86_64", + "macosx_10_15_intel", + "macosx_10_15_x86_64", + "macosx_11_0_x86_64", + "macosx_11_intel", + "macosx_11_0_x86_64", + "macosx_11_intel", + "macosx_10_9_universal2", + "macosx_10_10_universal2", + "macosx_10_11_universal2", + "macosx_10_12_universal2", + "macosx_10_13_universal2", + "macosx_10_14_universal2", + "macosx_10_15_universal2", + "macosx_11_0_universal2", + # 'macosx_11_0_arm64', ], - 'windows': [ - 'win_amd64', + "windows": [ + "win_amd64", ], } -THIRDPARTY_DIR = 'thirdparty' -CACHE_THIRDPARTY_DIR = '.cache/thirdparty' +THIRDPARTY_DIR = "thirdparty" +CACHE_THIRDPARTY_DIR = ".cache/thirdparty" + +################################################################################ -REMOTE_LINKS_URL = 'https://thirdparty.aboutcode.org/pypi' +ABOUT_BASE_URL = "https://thirdparty.aboutcode.org/pypi" +ABOUT_PYPI_SIMPLE_URL = f"{ABOUT_BASE_URL}/simple" +ABOUT_LINKS_URL = f"{ABOUT_PYPI_SIMPLE_URL}/links.html" +PYPI_SIMPLE_URL = "https://pypi.org/simple" +PYPI_INDEX_URLS = (PYPI_SIMPLE_URL, ABOUT_PYPI_SIMPLE_URL) -EXTENSIONS_APP = '.pyz', -EXTENSIONS_SDIST = '.tar.gz', '.tar.bz2', '.zip', '.tar.xz', -EXTENSIONS_INSTALLABLE = EXTENSIONS_SDIST + ('.whl',) -EXTENSIONS_ABOUT = '.ABOUT', '.LICENSE', '.NOTICE', -EXTENSIONS = EXTENSIONS_INSTALLABLE + EXTENSIONS_ABOUT + EXTENSIONS_APP +################################################################################ -PYPI_SIMPLE_URL = 'https://pypi.org/simple' +EXTENSIONS_APP = (".pyz",) +EXTENSIONS_SDIST = ( + ".tar.gz", + ".zip", + ".tar.xz", +) +EXTENSIONS_INSTALLABLE = EXTENSIONS_SDIST + (".whl",) +EXTENSIONS_ABOUT = ( + ".ABOUT", + ".LICENSE", + ".NOTICE", +) +EXTENSIONS = EXTENSIONS_INSTALLABLE + EXTENSIONS_ABOUT + EXTENSIONS_APP -LICENSEDB_API_URL = 'https://scancode-licensedb.aboutcode.org' +LICENSEDB_API_URL = "https://scancode-licensedb.aboutcode.org" LICENSING = license_expression.Licensing() +collect_urls = re.compile('href="([^"]+)"').findall + ################################################################################ -# -# Fetch remote wheels and sources locally -# +# Fetch wheels and sources locally ################################################################################ -def fetch_wheels( - environment=None, - requirements_file='requirements.txt', - allow_unpinned=False, - dest_dir=THIRDPARTY_DIR, - remote_links_url=REMOTE_LINKS_URL, -): - """ - Download all of the wheel of packages listed in the ``requirements_file`` - requirements file into ``dest_dir`` directory. - - Only get wheels for the ``environment`` Enviromnent constraints. If the - provided ``environment`` is None then the current Python interpreter - environment is used implicitly. +class DistributionNotFound(Exception): + pass - Only accept pinned requirements (e.g. with a version) unless - ``allow_unpinned`` is True. - Use exclusively direct downloads from a remote repo at URL - ``remote_links_url``. If ``remote_links_url`` is a path, use this as a - directory of links instead of a URL. +def download_wheel(name, version, environment, dest_dir=THIRDPARTY_DIR, repos=tuple()): + """ + Download the wheels binary distribution(s) of package ``name`` and + ``version`` matching the ``environment`` Environment constraints into the + ``dest_dir`` directory. Return a list of fetched_wheel_filenames, possibly + empty. - Yield tuples of (PypiPackage, error) where is None on success. + Use the first PyPI simple repository from a list of ``repos`` that contains this wheel. """ - missed = [] + if TRACE_DEEP: + print(f" download_wheel: {name}=={version} for envt: {environment}") - if not allow_unpinned: - force_pinned = True - else: - force_pinned = False + if not repos: + repos = DEFAULT_PYPI_REPOS - try: - rrp = list(get_required_remote_packages( - requirements_file=requirements_file, - force_pinned=force_pinned, - remote_links_url=remote_links_url, - )) - except Exception as e: - raise Exception( - dict( - requirements_file=requirements_file, - force_pinned=force_pinned, - remote_links_url=remote_links_url, - ) - ) from e + fetched_wheel_filenames = [] - fetched_filenames = set() - for name, version, package in rrp: + for repo in repos: + package = repo.get_package_version(name=name, version=version) if not package: - missed.append((name, version,)) - nv = f'{name}=={version}' if version else name - yield None, f'fetch_wheels: Missing package in remote repo: {nv}' + if TRACE_DEEP: + print(f" download_wheel: No package in {repo.index_url} for {name}=={version}") + continue + supported_wheels = list(package.get_supported_wheels(environment=environment)) + if not supported_wheels: + if TRACE_DEEP: + print( + f" download_wheel: No supported wheel for {name}=={version}: {environment} " + ) + continue - else: - fetched_filename = package.fetch_wheel( - environment=environment, - fetched_filenames=fetched_filenames, - dest_dir=dest_dir, - ) + for wheel in supported_wheels: + if TRACE_DEEP: + print( + f" download_wheel: Getting wheel from index (or cache): {wheel.download_url}" + ) + fetched_wheel_filename = wheel.download(dest_dir=dest_dir) + fetched_wheel_filenames.append(fetched_wheel_filename) - if fetched_filename: - fetched_filenames.add(fetched_filename) - error = None - else: - if fetched_filename in fetched_filenames: - error = None - else: - error = f'Failed to fetch' - yield package, error - - if missed: - rr = get_remote_repo() - print() - print(f'===> fetch_wheels: Missed some packages') - for n, v in missed: - nv = f'{n}=={v}' if v else n - print(f'Missed package {nv} in remote repo, has only:') - for pv in rr.get_versions(n): - print(' ', pv) - raise Exception('Missed some packages in remote repo') - - -def fetch_sources( - requirements_file='requirements.txt', - allow_unpinned=False, - dest_dir=THIRDPARTY_DIR, - remote_links_url=REMOTE_LINKS_URL, -): - """ - Download all of the dependent package sources listed in the - ``requirements_file`` requirements file into ``dest_dir`` destination - directory. + if fetched_wheel_filenames: + # do not futher fetch from other repos if we find in first, typically PyPI + break - Use direct downloads to achieve this (not pip download). Use exclusively the - packages found from a remote repo at URL ``remote_links_url``. If - ``remote_links_url`` is a path, use this as a directory of links instead of - a URL. + return fetched_wheel_filenames - Only accept pinned requirements (e.g. with a version) unless - ``allow_unpinned`` is True. - Yield tuples of (PypiPackage, error message) for each package where error - message will empty on success. +def download_sdist(name, version, dest_dir=THIRDPARTY_DIR, repos=tuple()): """ - missed = [] + Download the sdist source distribution of package ``name`` and ``version`` + into the ``dest_dir`` directory. Return a fetched filename or None. - if not allow_unpinned: - force_pinned = True - else: - force_pinned = False + Use the first PyPI simple repository from a list of ``repos`` that contains + this sdist. + """ + if TRACE: + print(f" download_sdist: {name}=={version}") + + if not repos: + repos = DEFAULT_PYPI_REPOS - rrp = list(get_required_remote_packages( - requirements_file=requirements_file, - force_pinned=force_pinned, - remote_links_url=remote_links_url, - )) + fetched_sdist_filename = None + + for repo in repos: + package = repo.get_package_version(name=name, version=version) - for name, version, package in rrp: if not package: - missed.append((name, name,)) - nv = f'{name}=={version}' if version else name - yield None, f'fetch_sources: Missing package in remote repo: {nv}' + if TRACE_DEEP: + print(f" download_sdist: No package in {repo.index_url} for {name}=={version}") + continue + sdist = package.sdist + if not sdist: + if TRACE_DEEP: + print(f" download_sdist: No sdist for {name}=={version}") + continue - elif not package.sdist: - yield package, f'Missing sdist in links' + if TRACE_DEEP: + print(f" download_sdist: Getting sdist from index (or cache): {sdist.download_url}") + fetched_sdist_filename = package.sdist.download(dest_dir=dest_dir) - else: - fetched = package.fetch_sdist(dest_dir=dest_dir) - error = f'Failed to fetch' if not fetched else None - yield package, error - if missed: - raise Exception(f'Missing source packages in {remote_links_url}', missed) + if fetched_sdist_filename: + # do not futher fetch from other repos if we find in first, typically PyPI + break + + return fetched_sdist_filename ################################################################################ # @@ -286,12 +322,12 @@ def fetch_sources( class NameVer: name = attr.ib( type=str, - metadata=dict(help='Python package name, lowercase and normalized.'), + metadata=dict(help="Python package name, lowercase and normalized."), ) version = attr.ib( type=str, - metadata=dict(help='Python package version string.'), + metadata=dict(help="Python package version string."), ) @property @@ -306,17 +342,6 @@ def normalize_name(name): """ return name and re.sub(r"[-_.]+", "-", name).lower() or name - @staticmethod - def standardize_name(name): - """ - Return a standardized package name, e.g. lowercased and using - not _ - """ - return name and re.sub(r"[-_]+", "-", name).lower() or name - - @property - def name_ver(self): - return f'{self.name}-{self.version}' - def sortable_name_version(self): """ Return a tuple of values to sort by name, then version. @@ -326,154 +351,154 @@ def sortable_name_version(self): @classmethod def sorted(cls, namevers): - return sorted(namevers, key=cls.sortable_name_version) + return sorted(namevers or [], key=cls.sortable_name_version) @attr.attributes class Distribution(NameVer): - # field names that can be updated from another dist of mapping + # field names that can be updated from another Distribution or mapping updatable_fields = [ - 'license_expression', - 'copyright', - 'description', - 'homepage_url', - 'primary_language', - 'notice_text', - 'extra_data', + "license_expression", + "copyright", + "description", + "homepage_url", + "primary_language", + "notice_text", + "extra_data", ] filename = attr.ib( repr=False, type=str, - default='', - metadata=dict(help='File name.'), + default="", + metadata=dict(help="File name."), ) path_or_url = attr.ib( repr=False, type=str, - default='', - metadata=dict(help='Path or download URL.'), + default="", + metadata=dict(help="Path or URL"), ) sha256 = attr.ib( repr=False, type=str, - default='', - metadata=dict(help='SHA256 checksum.'), + default="", + metadata=dict(help="SHA256 checksum."), ) sha1 = attr.ib( repr=False, type=str, - default='', - metadata=dict(help='SHA1 checksum.'), + default="", + metadata=dict(help="SHA1 checksum."), ) md5 = attr.ib( repr=False, type=int, default=0, - metadata=dict(help='MD5 checksum.'), + metadata=dict(help="MD5 checksum."), ) type = attr.ib( repr=False, type=str, - default='pypi', - metadata=dict(help='Package type'), + default="pypi", + metadata=dict(help="Package type"), ) namespace = attr.ib( repr=False, type=str, - default='', - metadata=dict(help='Package URL namespace'), + default="", + metadata=dict(help="Package URL namespace"), ) qualifiers = attr.ib( repr=False, type=dict, default=attr.Factory(dict), - metadata=dict(help='Package URL qualifiers'), + metadata=dict(help="Package URL qualifiers"), ) subpath = attr.ib( repr=False, type=str, - default='', - metadata=dict(help='Package URL subpath'), + default="", + metadata=dict(help="Package URL subpath"), ) size = attr.ib( repr=False, type=str, - default='', - metadata=dict(help='Size in bytes.'), + default="", + metadata=dict(help="Size in bytes."), ) primary_language = attr.ib( repr=False, type=str, - default='Python', - metadata=dict(help='Primary Programming language.'), + default="Python", + metadata=dict(help="Primary Programming language."), ) description = attr.ib( repr=False, type=str, - default='', - metadata=dict(help='Description.'), + default="", + metadata=dict(help="Description."), ) homepage_url = attr.ib( repr=False, type=str, - default='', - metadata=dict(help='Homepage URL'), + default="", + metadata=dict(help="Homepage URL"), ) notes = attr.ib( repr=False, type=str, - default='', - metadata=dict(help='Notes.'), + default="", + metadata=dict(help="Notes."), ) copyright = attr.ib( repr=False, type=str, - default='', - metadata=dict(help='Copyright.'), + default="", + metadata=dict(help="Copyright."), ) license_expression = attr.ib( repr=False, type=str, - default='', - metadata=dict(help='License expression'), + default="", + metadata=dict(help="License expression"), ) licenses = attr.ib( repr=False, type=list, default=attr.Factory(list), - metadata=dict(help='List of license mappings.'), + metadata=dict(help="List of license mappings."), ) notice_text = attr.ib( repr=False, type=str, - default='', - metadata=dict(help='Notice text'), + default="", + metadata=dict(help="Notice text"), ) extra_data = attr.ib( repr=False, type=dict, default=attr.Factory(dict), - metadata=dict(help='Extra data'), + metadata=dict(help="Extra data"), ) @property @@ -481,51 +506,110 @@ def package_url(self): """ Return a Package URL string of self. """ - return str(packageurl.PackageURL(**self.purl_identifiers())) + return str( + packageurl.PackageURL( + type=self.type, + namespace=self.namespace, + name=self.name, + version=self.version, + subpath=self.subpath, + qualifiers=self.qualifiers, + ) + ) @property def download_url(self): - if self.path_or_url and self.path_or_url.startswith('https://'): - return self.path_or_url - else: - return self.get_best_download_url() + return self.get_best_download_url() + + def get_best_download_url(self, repos=tuple()): + """ + Return the best download URL for this distribution where best means this + is the first URL found for this distribution found in the list of + ``repos``. + + If none is found, return a synthetic PyPI remote URL. + """ + + if not repos: + repos = DEFAULT_PYPI_REPOS + + for repo in repos: + package = repo.get_package_version(name=self.name, version=self.version) + if not package: + if TRACE: + print( + f" get_best_download_url: {self.name}=={self.version} " + f"not found in {repo.index_url}" + ) + continue + pypi_url = package.get_url_for_filename(self.filename) + if pypi_url: + return pypi_url + else: + if TRACE: + print( + f" get_best_download_url: {self.filename} not found in {repo.index_url}" + ) + + def download(self, dest_dir=THIRDPARTY_DIR): + """ + Download this distribution into `dest_dir` directory. + Return the fetched filename. + """ + assert self.filename + if TRACE_DEEP: + print( + f"Fetching distribution of {self.name}=={self.version}:", + self.filename, + ) + + # FIXME: + fetch_and_save( + path_or_url=self.path_or_url, + dest_dir=dest_dir, + filename=self.filename, + as_text=False, + ) + return self.filename @property def about_filename(self): - return f'{self.filename}.ABOUT' - - def has_about_file(self, dest_dir=THIRDPARTY_DIR): - return os.path.exists(os.path.join(dest_dir, self.about_filename)) + return f"{self.filename}.ABOUT" @property def about_download_url(self): - return self.build_remote_download_url(self.about_filename) + return f"{ABOUT_BASE_URL}/{self.about_filename}" @property def notice_filename(self): - return f'{self.filename}.NOTICE' + return f"{self.filename}.NOTICE" @property def notice_download_url(self): - return self.build_remote_download_url(self.notice_filename) + return f"{ABOUT_BASE_URL}/{self.notice_filename}" @classmethod def from_path_or_url(cls, path_or_url): """ Return a distribution built from the data found in the filename of a - `path_or_url` string. Raise an exception if this is not a valid + ``path_or_url`` string. Raise an exception if this is not a valid filename. """ - filename = os.path.basename(path_or_url.strip('/')) + filename = os.path.basename(path_or_url.strip("/")) dist = cls.from_filename(filename) dist.path_or_url = path_or_url return dist @classmethod def get_dist_class(cls, filename): - if filename.endswith('.whl'): + if filename.endswith(".whl"): return Wheel - elif filename.endswith(('.zip', '.tar.gz',)): + elif filename.endswith( + ( + ".zip", + ".tar.gz", + ) + ): return Sdist raise InvalidDistributionFilename(filename) @@ -535,123 +619,15 @@ def from_filename(cls, filename): Return a distribution built from the data found in a `filename` string. Raise an exception if this is not a valid filename """ + filename = os.path.basename(filename.strip("/")) clazz = cls.get_dist_class(filename) return clazz.from_filename(filename) - @classmethod - def from_data(cls, data, keep_extra=False): - """ - Return a distribution built from a `data` mapping. - """ - filename = data['filename'] - dist = cls.from_filename(filename) - dist.update(data, keep_extra=keep_extra) - return dist - - @classmethod - def from_dist(cls, data, dist): - """ - Return a distribution built from a `data` mapping and update it with data - from another dist Distribution. Return None if it cannot be created - """ - # We can only create from a dist of the same package - has_same_key_fields = all(data.get(kf) == getattr(dist, kf, None) - for kf in ('type', 'namespace', 'name') - ) - if not has_same_key_fields: - print(f'Missing key fields: Cannot derive a new dist from data: {data} and dist: {dist}') - return - - has_key_field_values = all(data.get(kf) for kf in ('type', 'name', 'version')) - if not has_key_field_values: - print(f'Missing key field values: Cannot derive a new dist from data: {data} and dist: {dist}') - return - - data = dict(data) - # do not overwrite the data with the other dist - # only supplement - data.update({k: v for k, v in dist.get_updatable_data().items() if not data.get(k)}) - return cls.from_data(data) - - @classmethod - def build_remote_download_url(cls, filename, base_url=REMOTE_LINKS_URL): - """ - Return a direct download URL for a file in our remote repo - """ - return f'{base_url}/{filename}' - - def get_best_download_url(self): - """ - Return the best download URL for this distribution where best means that - PyPI is better and our own remote repo URLs are second. - If none is found, return a synthetic remote URL. - """ - name = self.normalized_name - version = self.version - filename = self.filename - - pypi_package = get_pypi_package(name=name, version=version) - if pypi_package: - pypi_url = pypi_package.get_url_for_filename(filename) - if pypi_url: - return pypi_url - - remote_package = get_remote_package(name=name, version=version) - if remote_package: - remote_url = remote_package.get_url_for_filename(filename) - if remote_url: - return remote_url - else: - # the package may not have been published yet, so we craft a URL - # using our remote base URL - return self.build_remote_download_url(self.filename) - - def purl_identifiers(self, skinny=False): - """ - Return a mapping of non-empty identifier name/values for the purl - fields. If skinny is True, only inlucde type, namespace and name. - """ - identifiers = dict( - type=self.type, - namespace=self.namespace, - name=self.name, - ) - - if not skinny: - identifiers.update( - version=self.version, - subpath=self.subpath, - qualifiers=self.qualifiers, - ) - - return {k: v for k, v in sorted(identifiers.items()) if v} - - def identifiers(self, purl_as_fields=True): - """ - Return a mapping of non-empty identifier name/values. - Return each purl fields separately if purl_as_fields is True. - Otherwise return a package_url string for the purl. - """ - if purl_as_fields: - identifiers = self.purl_identifiers() - else: - identifiers = dict(package_url=self.package_url) - - identifiers.update( - download_url=self.download_url, - filename=self.filename, - md5=self.md5, - sha1=self.sha1, - package_url=self.package_url, - ) - - return {k: v for k, v in sorted(identifiers.items()) if v} - def has_key_metadata(self): """ Return True if this distribution has key metadata required for basic attribution. """ - if self.license_expression == 'public-domain': + if self.license_expression == "public-domain": # copyright not needed return True return self.license_expression and self.copyright and self.path_or_url @@ -672,7 +648,7 @@ def to_about(self): name=self.name, namespace=self.namespace, notes=self.notes, - notice_file=self.notice_filename if self.notice_text else '', + notice_file=self.notice_filename if self.notice_text else "", package_url=self.package_url, primary_language=self.primary_language, qualifiers=self.qualifiers, @@ -690,7 +666,7 @@ def to_dict(self): """ Return a mapping data from this distribution. """ - return {k: v for k, v in attr.asdict(self).items() if v} + return {k: v for k, v in attr.asdict(self).items() if v} def save_about_and_notice_files(self, dest_dir=THIRDPARTY_DIR): """ @@ -705,14 +681,17 @@ def save_if_modified(location, content): if existing_content == content: return False - if TRACE: print(f'Saving ABOUT (and NOTICE) files for: {self}') - with open(location, 'w') as fo: + if TRACE: + print(f"Saving ABOUT (and NOTICE) files for: {self}") + with open(location, "w") as fo: fo.write(content) return True + as_about = self.to_about() + save_if_modified( location=os.path.join(dest_dir, self.about_filename), - content=saneyaml.dump(self.to_about()), + content=saneyaml.dump(as_about), ) notice_text = self.notice_text and self.notice_text.strip() @@ -745,26 +724,26 @@ def load_about_data(self, about_filename_or_data=None, dest_dir=THIRDPARTY_DIR): else: about_data = about_filename_or_data - md5 = about_data.pop('checksum_md5', None) + md5 = about_data.pop("checksum_md5", None) if md5: - about_data['md5'] = md5 - sha1 = about_data.pop('checksum_sha1', None) + about_data["md5"] = md5 + sha1 = about_data.pop("checksum_sha1", None) if sha1: - about_data['sha1'] = sha1 - sha256 = about_data.pop('checksum_sha256', None) + about_data["sha1"] = sha1 + sha256 = about_data.pop("checksum_sha256", None) if sha256: - about_data['sha256'] = sha256 + about_data["sha256"] = sha256 - about_data.pop('about_resource', None) - notice_text = about_data.pop('notice_text', None) - notice_file = about_data.pop('notice_file', None) + about_data.pop("about_resource", None) + notice_text = about_data.pop("notice_text", None) + notice_file = about_data.pop("notice_file", None) if notice_text: - about_data['notice_text'] = notice_text + about_data["notice_text"] = notice_text elif notice_file: notice_loc = os.path.join(dest_dir, notice_file) if os.path.exists(notice_loc): with open(notice_loc) as fi: - about_data['notice_text'] = fi.read() + about_data["notice_text"] = fi.read() return self.update(about_data, keep_extra=True) def load_remote_about_data(self): @@ -773,7 +752,10 @@ def load_remote_about_data(self): NOTICE file if any. Return True if the data was updated. """ try: - about_text = fetch_content_from_path_or_url_through_cache(self.about_download_url) + about_text = CACHE.get( + path_or_url=self.about_download_url, + as_text=True, + ) except RemoteNotFetchedException: return False @@ -781,14 +763,17 @@ def load_remote_about_data(self): return False about_data = saneyaml.load(about_text) - notice_file = about_data.pop('notice_file', None) + notice_file = about_data.pop("notice_file", None) if notice_file: try: - notice_text = fetch_content_from_path_or_url_through_cache(self.notice_download_url) + notice_text = CACHE.get( + path_or_url=self.notice_download_url, + as_text=True, + ) if notice_text: - about_data['notice_text'] = notice_text + about_data["notice_text"] = notice_text except RemoteNotFetchedException: - print(f'Failed to fetch NOTICE file: {self.notice_download_url}') + print(f"Failed to fetch NOTICE file: {self.notice_download_url}") return self.load_about_data(about_data) def get_checksums(self, dest_dir=THIRDPARTY_DIR): @@ -798,7 +783,7 @@ def get_checksums(self, dest_dir=THIRDPARTY_DIR): """ dist_loc = os.path.join(dest_dir, self.filename) if os.path.exists(dist_loc): - return multi_checksums(dist_loc, checksum_names=('md5', 'sha1', 'sha256')) + return multi_checksums(dist_loc, checksum_names=("md5", "sha1", "sha256")) else: return {} @@ -814,67 +799,65 @@ def validate_checksums(self, dest_dir=THIRDPARTY_DIR): checksums computed for this dist filename is `dest_dir`. """ real_checksums = self.get_checksums(dest_dir) - for csk in ('md5', 'sha1', 'sha256'): + for csk in ("md5", "sha1", "sha256"): csv = getattr(self, csk) rcv = real_checksums.get(csk) if csv and rcv and csv != rcv: return False return True - def get_pip_hash(self): - """ - Return a pip hash option string as used in requirements for this dist. - """ - assert self.sha256, f'Missinh SHA256 for dist {self}' - return f'--hash=sha256:{self.sha256}' - def get_license_keys(self): try: - keys = LICENSING.license_keys(self.license_expression, unique=True, simple=True) + keys = LICENSING.license_keys( + self.license_expression, + unique=True, + simple=True, + ) except license_expression.ExpressionParseError: - return ['unknown'] + return ["unknown"] return keys - def fetch_license_files(self, dest_dir=THIRDPARTY_DIR): + def fetch_license_files(self, dest_dir=THIRDPARTY_DIR, use_cached_index=False): """ - Fetch license files is missing in `dest_dir`. + Fetch license files if missing in `dest_dir`. Return True if license files were fetched. """ - paths_or_urls = get_remote_repo().links + urls = LinksRepository.from_url(use_cached_index=use_cached_index).links errors = [] - extra_lic_names = [l.get('file') for l in self.extra_data.get('licenses', {})] - extra_lic_names += [self.extra_data.get('license_file')] - extra_lic_names = [ln for ln in extra_lic_names if ln] - lic_names = [ f'{key}.LICENSE' for key in self.get_license_keys()] - for filename in lic_names + extra_lic_names: + extra_lic_names = [l.get("file") for l in self.extra_data.get("licenses", {})] + extra_lic_names += [self.extra_data.get("license_file")] + extra_lic_names = [ln for ln in extra_lic_names if ln] + lic_names = [f"{key}.LICENSE" for key in self.get_license_keys()] + for filename in lic_names + extra_lic_names: floc = os.path.join(dest_dir, filename) if os.path.exists(floc): continue try: # try remotely first - lic_url = get_link_for_filename( - filename=filename, paths_or_urls=paths_or_urls) + lic_url = get_license_link_for_filename(filename=filename, urls=urls) - fetch_and_save_path_or_url( - filename=filename, - dest_dir=dest_dir, + fetch_and_save( path_or_url=lic_url, + dest_dir=dest_dir, + filename=filename, as_text=True, ) - if TRACE: print(f'Fetched license from remote: {lic_url}') + if TRACE: + print(f"Fetched license from remote: {lic_url}") except: try: # try licensedb second - lic_url = f'{LICENSEDB_API_URL}/{filename}' - fetch_and_save_path_or_url( - filename=filename, - dest_dir=dest_dir, + lic_url = f"{LICENSEDB_API_URL}/{filename}" + fetch_and_save( path_or_url=lic_url, + dest_dir=dest_dir, + filename=filename, as_text=True, ) - if TRACE: print(f'Fetched license from licensedb: {lic_url}') + if TRACE: + print(f"Fetched license from licensedb: {lic_url}") except: msg = f'No text for license {filename} in expression "{self.license_expression}" from {self}' @@ -888,14 +871,27 @@ def extract_pkginfo(self, dest_dir=THIRDPARTY_DIR): Return the text of the first PKG-INFO or METADATA file found in the archive of this Distribution in `dest_dir`. Return None if not found. """ - fmt = 'zip' if self.filename.endswith('.whl') else None - dist = os.path.join(dest_dir, self.filename) - with tempfile.TemporaryDirectory(prefix='pypi-tmp-extract') as td: + + fn = self.filename + if fn.endswith(".whl"): + fmt = "zip" + elif fn.endswith(".tar.gz"): + fmt = "gztar" + else: + fmt = None + + dist = os.path.join(dest_dir, fn) + with tempfile.TemporaryDirectory(prefix=f"pypi-tmp-extract-{fn}") as td: shutil.unpack_archive(filename=dist, extract_dir=td, format=fmt) # NOTE: we only care about the first one found in the dist # which may not be 100% right for pi in fileutils.resource_iter(location=td, with_dirs=False): - if pi.endswith(('PKG-INFO', 'METADATA',)): + if pi.endswith( + ( + "PKG-INFO", + "METADATA", + ) + ): with open(pi) as fi: return fi.read() @@ -906,31 +902,33 @@ def load_pkginfo_data(self, dest_dir=THIRDPARTY_DIR): """ pkginfo_text = self.extract_pkginfo(dest_dir=dest_dir) if not pkginfo_text: - print(f'!!!!PKG-INFO not found in {self.filename}') + print(f"!!!!PKG-INFO/METADATA not found in {self.filename}") return raw_data = email.message_from_string(pkginfo_text) - classifiers = raw_data.get_all('Classifier') or [] + classifiers = raw_data.get_all("Classifier") or [] - declared_license = [raw_data['License']] + [c for c in classifiers if c.startswith('License')] + declared_license = [raw_data["License"]] + [ + c for c in classifiers if c.startswith("License") + ] license_expression = compute_normalized_license_expression(declared_license) - other_classifiers = [c for c in classifiers if not c.startswith('License')] + other_classifiers = [c for c in classifiers if not c.startswith("License")] - holder = raw_data['Author'] - holder_contact = raw_data['Author-email'] - copyright_statement = f'Copyright (c) {holder} <{holder_contact}>' + holder = raw_data["Author"] + holder_contact = raw_data["Author-email"] + copyright_statement = f"Copyright (c) {holder} <{holder_contact}>" pkginfo_data = dict( - name=raw_data['Name'], + name=raw_data["Name"], declared_license=declared_license, - version=raw_data['Version'], - description=raw_data['Summary'], - homepage_url=raw_data['Home-page'], + version=raw_data["Version"], + description=raw_data["Summary"], + homepage_url=raw_data["Home-page"], copyright=copyright_statement, license_expression=license_expression, holder=holder, holder_contact=holder_contact, - keywords=raw_data['Keywords'], + keywords=raw_data["Keywords"], classifiers=other_classifiers, ) @@ -944,10 +942,7 @@ def update_from_other_dist(self, dist): def get_updatable_data(self, data=None): data = data or self.to_dict() - return { - k: v for k, v in data.items() - if v and k in self.updatable_fields - } + return {k: v for k, v in data.items() if v and k in self.updatable_fields} def update(self, data, overwrite=False, keep_extra=True): """ @@ -956,20 +951,21 @@ def update(self, data, overwrite=False, keep_extra=True): Return True if any data was updated, False otherwise. Raise an exception if there are key data conflicts. """ - package_url = data.get('package_url') + package_url = data.get("package_url") if package_url: purl_from_data = packageurl.PackageURL.from_string(package_url) purl_from_self = packageurl.PackageURL.from_string(self.package_url) if purl_from_data != purl_from_self: print( - f'Invalid dist update attempt, no same same purl with dist: ' - f'{self} using data {data}.') + f"Invalid dist update attempt, no same same purl with dist: " + f"{self} using data {data}." + ) return - data.pop('about_resource', None) - dl = data.pop('download_url', None) + data.pop("about_resource", None) + dl = data.pop("download_url", None) if dl: - data['path_or_url'] = dl + data["path_or_url"] = dl updated = False extra = {} @@ -985,7 +981,7 @@ def update(self, data, overwrite=False, keep_extra=True): try: setattr(self, k, v) except Exception as e: - raise Exception(f'{self}, {k}, {v}') from e + raise Exception(f"{self}, {k}, {v}") from e updated = True elif keep_extra: @@ -998,18 +994,110 @@ def update(self, data, overwrite=False, keep_extra=True): return updated +def get_license_link_for_filename(filename, urls): + """ + Return a link for `filename` found in the `links` list of URLs or paths. Raise an + exception if no link is found or if there are more than one link for that + file name. + """ + path_or_url = [l for l in urls if l.endswith(f"/{filename}")] + if not path_or_url: + raise Exception(f"Missing link to file: {filename}") + if not len(path_or_url) == 1: + raise Exception(f"Multiple links to file: {filename}: \n" + "\n".join(path_or_url)) + return path_or_url[0] + + class InvalidDistributionFilename(Exception): pass +def get_sdist_name_ver_ext(filename): + """ + Return a (name, version, extension) if filename is a valid sdist name. Some legacy + binary builds have weird names. Return False otherwise. + + In particular they do not use PEP440 compliant versions and/or mix tags, os + and arch names in tarball names and versions: + + >>> assert get_sdist_name_ver_ext("intbitset-1.3.tar.gz") + >>> assert not get_sdist_name_ver_ext("intbitset-1.3.linux-x86_64.tar.gz") + >>> assert get_sdist_name_ver_ext("intbitset-1.4a.tar.gz") + >>> assert get_sdist_name_ver_ext("intbitset-1.4a.zip") + >>> assert not get_sdist_name_ver_ext("intbitset-2.0.linux-x86_64.tar.gz") + >>> assert get_sdist_name_ver_ext("intbitset-2.0.tar.gz") + >>> assert not get_sdist_name_ver_ext("intbitset-2.1-1.src.rpm") + >>> assert not get_sdist_name_ver_ext("intbitset-2.1-1.x86_64.rpm") + >>> assert not get_sdist_name_ver_ext("intbitset-2.1.linux-x86_64.tar.gz") + >>> assert not get_sdist_name_ver_ext("cffi-1.2.0-1.tar.gz") + >>> assert not get_sdist_name_ver_ext("html5lib-1.0-reupload.tar.gz") + >>> assert not get_sdist_name_ver_ext("selenium-2.0-dev-9429.tar.gz") + >>> assert not get_sdist_name_ver_ext("testfixtures-1.8.0dev-r4464.tar.gz") + """ + name_ver = None + extension = None + + for ext in EXTENSIONS_SDIST: + if filename.endswith(ext): + name_ver, extension, _ = filename.rpartition(ext) + break + + if not extension or not name_ver: + return False + + name, _, version = name_ver.rpartition("-") + + if not name or not version: + return False + + # weird version + if any( + w in version + for w in ( + "x86_64", + "i386", + ) + ): + return False + + # all char versions + if version.isalpha(): + return False + + # non-pep 440 version + if "-" in version: + return False + + # single version + if version.isdigit() and len(version) == 1: + return False + + # r1 version + if len(version) == 2 and version[0]=="r" and version[1].isdigit(): + return False + + # dotless version (but calver is OK) + if "." not in version and len(version) < 3: + return False + + # version with dashes selenium-2.0-dev-9429.tar.gz + if name.endswith(("dev",)) and "." not in version: + return False + # version pre or post, old legacy + if version.startswith(("beta", "rc", "pre", "post", "final")): + return False + + return name, version, extension + + @attr.attributes class Sdist(Distribution): extension = attr.ib( repr=False, type=str, - default='', - metadata=dict(help='File extension, including leading dot.'), + default="", + metadata=dict(help="File extension, including leading dot."), ) @classmethod @@ -1018,24 +1106,14 @@ def from_filename(cls, filename): Return a Sdist object built from a filename. Raise an exception if this is not a valid sdist filename """ - name_ver = None - extension = None - - for ext in EXTENSIONS_SDIST: - if filename.endswith(ext): - name_ver, extension, _ = filename.rpartition(ext) - break - - if not extension or not name_ver: + name_ver_ext = get_sdist_name_ver_ext(filename) + if not name_ver_ext: raise InvalidDistributionFilename(filename) - name, _, version = name_ver.rpartition('-') - - if not name or not version: - raise InvalidDistributionFilename(filename) + name, version, extension = name_ver_ext return cls( - type='pypi', + type="pypi", name=name, version=version, extension=extension, @@ -1047,7 +1125,7 @@ def to_filename(self): Return an sdist filename reconstructed from its fields (that may not be the same as the original filename.) """ - return f'{self.name}-{self.version}.{self.extension}' + return f"{self.name}-{self.version}.{self.extension}" @attr.attributes @@ -1092,38 +1170,38 @@ class Wheel(Distribution): r"""^(?P(?P.+?)-(?P.*?)) ((-(?P\d[^-]*?))?-(?P.+?)-(?P.+?)-(?P.+?) \.whl)$""", - re.VERBOSE + re.VERBOSE, ).match build = attr.ib( type=str, - default='', - metadata=dict(help='Python wheel build.'), + default="", + metadata=dict(help="Python wheel build."), ) python_versions = attr.ib( type=list, default=attr.Factory(list), - metadata=dict(help='List of wheel Python version tags.'), + metadata=dict(help="List of wheel Python version tags."), ) abis = attr.ib( type=list, default=attr.Factory(list), - metadata=dict(help='List of wheel ABI tags.'), + metadata=dict(help="List of wheel ABI tags."), ) platforms = attr.ib( type=list, default=attr.Factory(list), - metadata=dict(help='List of wheel platform tags.'), + metadata=dict(help="List of wheel platform tags."), ) tags = attr.ib( repr=False, type=set, default=attr.Factory(set), - metadata=dict(help='Set of all tags for this wheel.'), + metadata=dict(help="Set of all tags for this wheel."), ) @classmethod @@ -1136,24 +1214,23 @@ def from_filename(cls, filename): if not wheel_info: raise InvalidDistributionFilename(filename) - name = wheel_info.group('name').replace('_', '-') + name = wheel_info.group("name").replace("_", "-") # we'll assume "_" means "-" due to wheel naming scheme # (https://github.com/pypa/pip/issues/1150) - version = wheel_info.group('ver').replace('_', '-') - build = wheel_info.group('build') - python_versions = wheel_info.group('pyvers').split('.') - abis = wheel_info.group('abis').split('.') - platforms = wheel_info.group('plats').split('.') + version = wheel_info.group("ver").replace("_", "-") + build = wheel_info.group("build") + python_versions = wheel_info.group("pyvers").split(".") + abis = wheel_info.group("abis").split(".") + platforms = wheel_info.group("plats").split(".") # All the tag combinations from this file tags = { - packaging_tags.Tag(x, y, z) for x in python_versions - for y in abis for z in platforms + packaging_tags.Tag(x, y, z) for x in python_versions for y in abis for z in platforms } return cls( filename=filename, - type='pypi', + type="pypi", name=name, version=version, build=build, @@ -1167,25 +1244,22 @@ def is_supported_by_tags(self, tags): """ Return True is this wheel is compatible with one of a list of PEP 425 tags. """ + if TRACE_DEEP: + print() + print("is_supported_by_tags: tags:", tags) + print("self.tags:", self.tags) return not self.tags.isdisjoint(tags) - def is_supported_by_environment(self, environment): - """ - Return True if this wheel is compatible with the Environment - `environment`. - """ - return not self.is_supported_by_tags(environment.tags) - def to_filename(self): """ Return a wheel filename reconstructed from its fields (that may not be the same as the original filename.) """ - build = f'-{self.build}' if self.build else '' - pyvers = '.'.join(self.python_versions) - abis = '.'.join(self.abis) - plats = '.'.join(self.platforms) - return f'{self.name}-{self.version}{build}-{pyvers}-{abis}-{plats}.whl' + build = f"-{self.build}" if self.build else "" + pyvers = ".".join(self.python_versions) + abis = ".".join(self.abis) + plats = ".".join(self.platforms) + return f"{self.name}-{self.version}{build}-{pyvers}-{abis}-{plats}.whl" def is_pure(self): """ @@ -1211,11 +1285,7 @@ def is_pure(self): >>> Wheel.from_filename('future-0.16.0-py3-cp36m-any.whl').is_pure() False """ - return ( - 'py3' in self.python_versions - and 'none' in self.abis - and 'any' in self.platforms - ) + return "py3" in self.python_versions and "none" in self.abis and "any" in self.platforms def is_pure_wheel(filename): @@ -1228,49 +1298,32 @@ def is_pure_wheel(filename): @attr.attributes class PypiPackage(NameVer): """ - A Python package with its "distributions", e.g. wheels and source - distribution , ABOUT files and licenses or notices. + A Python package contains one or more wheels and one source distribution + from a repository. """ + sdist = attr.ib( repr=False, - type=str, - default='', - metadata=dict(help='Sdist source distribution for this package.'), + type=Sdist, + default=None, + metadata=dict(help="Sdist source distribution for this package."), ) wheels = attr.ib( repr=False, type=list, default=attr.Factory(list), - metadata=dict(help='List of Wheel for this package'), + metadata=dict(help="List of Wheel for this package"), ) - @property - def specifier(self): - """ - A requirement specifier for this package - """ - if self.version: - return f'{self.name}=={self.version}' - else: - return self.name - - @property - def specifier_with_hashes(self): - """ - Return a requirement specifier for this package with --hash options for - all its distributions - """ - items = [self.specifier] - items += [d.get_pip_hashes() for d in self.get_distributions()] - return ' \\\n '.join(items) - - def get_supported_wheels(self, environment): + def get_supported_wheels(self, environment, verbose=TRACE_ULTRA_DEEP): """ Yield all the Wheel of this package supported and compatible with the Environment `environment`. """ envt_tags = environment.tags() + if verbose: + print("get_supported_wheels: envt_tags:", envt_tags) for wheel in self.wheels: if wheel.is_supported_by_tags(envt_tags): yield wheel @@ -1296,6 +1349,8 @@ def package_from_dists(cls, dists): >>> assert package.wheels == [w1, w2] """ dists = list(dists) + if TRACE_DEEP: + print(f"package_from_dists: {dists}") if not dists: return @@ -1306,13 +1361,21 @@ def package_from_dists(cls, dists): package = PypiPackage(name=normalized_name, version=version) for dist in dists: - if dist.normalized_name != normalized_name or dist.version != version: + if dist.normalized_name != normalized_name: if TRACE: print( - f' Skipping inconsistent dist name and version: {dist} ' - f'Expected instead package name: {normalized_name} and version: "{version}"' + f" Skipping inconsistent dist name: expected {normalized_name} got {dist}" ) continue + elif dist.version != version: + dv = packaging_version.parse(dist.version) + v = packaging_version.parse(version) + if dv != v: + if TRACE: + print( + f" Skipping inconsistent dist version: expected {version} got {dist}" + ) + continue if isinstance(dist, Sdist): package.sdist = dist @@ -1321,210 +1384,102 @@ def package_from_dists(cls, dists): package.wheels.append(dist) else: - raise Exception(f'Unknown distribution type: {dist}') + raise Exception(f"Unknown distribution type: {dist}") + + if TRACE_DEEP: + print(f"package_from_dists: {package}") return package @classmethod - def packages_from_one_path_or_url(cls, path_or_url): + def packages_from_dir(cls, directory): """ - Yield PypiPackages built from files found in at directory path or the - URL to an HTML page (that will be fetched). + Yield PypiPackages built from files found in at directory path. """ - extracted_paths_or_urls = get_paths_or_urls(path_or_url) - return cls.packages_from_many_paths_or_urls(extracted_paths_or_urls) + base = os.path.abspath(directory) + + paths = [os.path.join(base, f) for f in os.listdir(base) if f.endswith(EXTENSIONS)] + + if TRACE_ULTRA_DEEP: + print("packages_from_dir: paths:", paths) + return PypiPackage.packages_from_many_paths_or_urls(paths) @classmethod def packages_from_many_paths_or_urls(cls, paths_or_urls): """ Yield PypiPackages built from a list of paths or URLs. + These are sorted by name and then by version from oldest to newest. """ - dists = cls.get_dists(paths_or_urls) + dists = PypiPackage.dists_from_paths_or_urls(paths_or_urls) + if TRACE_ULTRA_DEEP: + print("packages_from_many_paths_or_urls: dists:", dists) + dists = NameVer.sorted(dists) for _projver, dists_of_package in itertools.groupby( - dists, key=NameVer.sortable_name_version, + dists, + key=NameVer.sortable_name_version, ): - yield PypiPackage.package_from_dists(dists_of_package) + package = PypiPackage.package_from_dists(dists_of_package) + if TRACE_ULTRA_DEEP: + print("packages_from_many_paths_or_urls", package) + yield package @classmethod - def get_versions_from_path_or_url(cls, name, path_or_url): + def dists_from_paths_or_urls(cls, paths_or_urls): """ - Return a subset list from a list of PypiPackages version at `path_or_url` - that match PypiPackage `name`. - """ - packages = cls.packages_from_one_path_or_url(path_or_url) - return cls.get_versions(name, packages) + Return a list of Distribution given a list of + ``paths_or_urls`` to wheels or source distributions. - @classmethod - def get_versions(cls, name, packages): - """ - Return a subset list of package versions from a list of `packages` that - match PypiPackage `name`. - The list is sorted by version from oldest to most recent. - """ - norm_name = NameVer.normalize_name(name) - versions = [p for p in packages if p.normalized_name == norm_name] - return cls.sorted(versions) - - @classmethod - def get_latest_version(cls, name, packages): - """ - Return the latest version of PypiPackage `name` from a list of `packages`. - """ - versions = cls.get_versions(name, packages) - if not versions: - return - return versions[-1] - - @classmethod - def get_outdated_versions(cls, name, packages): - """ - Return all versions except the latest version of PypiPackage `name` from a - list of `packages`. - """ - versions = cls.get_versions(name, packages) - return versions[:-1] - - @classmethod - def get_name_version(cls, name, version, packages): - """ - Return the PypiPackage with `name` and `version` from a list of `packages` - or None if it is not found. - If `version` is None, return the latest version found. - """ - if version is None: - return cls.get_latest_version(name, packages) - - nvs = [p for p in cls.get_versions(name, packages) if p.version == version] - - if not nvs: - return - - if len(nvs) == 1: - return nvs[0] - - raise Exception(f'More than one PypiPackage with {name}=={version}') - - def fetch_wheel( - self, - environment=None, - fetched_filenames=None, - dest_dir=THIRDPARTY_DIR, - ): - """ - Download a binary wheel of this package matching the ``environment`` - Enviromnent constraints into ``dest_dir`` directory. - - Return the wheel filename if it was fetched, None otherwise. - - If the provided ``environment`` is None then the current Python - interpreter environment is used implicitly. Do not refetch wheel if - their name is in a provided ``fetched_filenames`` set. - """ - fetched_wheel_filename = None - if fetched_filenames is not None: - fetched_filenames = fetched_filenames - else: - fetched_filenames = set() - - for wheel in self.get_supported_wheels(environment): - - if wheel.filename not in fetched_filenames: - fetch_and_save_path_or_url( - filename=wheel.filename, - path_or_url=wheel.path_or_url, - dest_dir=dest_dir, - as_text=False, - ) - fetched_filenames.add(wheel.filename) - fetched_wheel_filename = wheel.filename - - # TODO: what if there is more than one? - break - - return fetched_wheel_filename - - def fetch_sdist(self, dest_dir=THIRDPARTY_DIR): - """ - Download the source distribution into `dest_dir` directory. Return the - fetched filename if it was fetched, False otherwise. - """ - if self.sdist: - assert self.sdist.filename - if TRACE: print('Fetching source for package:', self.name, self.version) - fetch_and_save_path_or_url( - filename=self.sdist.filename, - dest_dir=dest_dir, - path_or_url=self.sdist.path_or_url, - as_text=False, - ) - if TRACE: print(' --> file:', self.sdist.filename) - return self.sdist.filename - else: - print(f'Missing sdist for: {self.name}=={self.version}') - return False - - def delete_files(self, dest_dir=THIRDPARTY_DIR): - """ - Delete all PypiPackage files from `dest_dir` including wheels, sdist and - their ABOUT files. Note that we do not delete licenses since they can be - shared by several packages: therefore this would be done elsewhere in a - function that is aware of all used licenses. - """ - for to_delete in self.wheels + [self.sdist]: - if not to_delete: - continue - tdfn = to_delete.filename - for deletable in [tdfn, f'{tdfn}.ABOUT', f'{tdfn}.NOTICE']: - target = os.path.join(dest_dir, deletable) - if os.path.exists(target): - print(f'Deleting outdated {target}') - fileutils.delete(target) - - @classmethod - def get_dists(cls, paths_or_urls): - """ - Return a list of Distribution given a list of - `paths_or_urls` to wheels or source distributions. - - Each Distribution receives two extra attributes: - - the path_or_url it was created from - - its filename + Each Distribution receives two extra attributes: + - the path_or_url it was created from + - its filename For example: >>> paths_or_urls =''' ... /home/foo/bitarray-0.8.1-cp36-cp36m-linux_x86_64.whl ... bitarray-0.8.1-cp36-cp36m-macosx_10_9_x86_64.macosx_10_10_x86_64.whl ... bitarray-0.8.1-cp36-cp36m-win_amd64.whl - ... httsp://example.com/bar/bitarray-0.8.1.tar.gz - ... bitarray-0.8.1.tar.gz.ABOUT bit.LICENSE'''.split() - >>> result = list(PypiPackage.get_dists(paths_or_urls)) + ... https://example.com/bar/bitarray-0.8.1.tar.gz + ... bitarray-0.8.1.tar.gz.ABOUT + ... bit.LICENSE'''.split() + >>> results = list(PypiPackage.dists_from_paths_or_urls(paths_or_urls)) >>> for r in results: - ... r.filename = '' - ... r.path_or_url = '' - >>> expected = [ - ... Wheel(name='bitarray', version='0.8.1', build='', - ... python_versions=['cp36'], abis=['cp36m'], - ... platforms=['linux_x86_64']), - ... Wheel(name='bitarray', version='0.8.1', build='', - ... python_versions=['cp36'], abis=['cp36m'], - ... platforms=['macosx_10_9_x86_64', 'macosx_10_10_x86_64']), - ... Wheel(name='bitarray', version='0.8.1', build='', - ... python_versions=['cp36'], abis=['cp36m'], - ... platforms=['win_amd64']), - ... Sdist(name='bitarray', version='0.8.1') - ... ] - >>> assert expected == result - """ + ... print(r.__class__.__name__, r.name, r.version) + ... if isinstance(r, Wheel): + ... print(" ", ", ".join(r.python_versions), ", ".join(r.platforms)) + Wheel bitarray 0.8.1 + cp36 linux_x86_64 + Wheel bitarray 0.8.1 + cp36 macosx_10_9_x86_64, macosx_10_10_x86_64 + Wheel bitarray 0.8.1 + cp36 win_amd64 + Sdist bitarray 0.8.1 + """ + dists = [] + if TRACE_ULTRA_DEEP: + print(" ###paths_or_urls:", paths_or_urls) installable = [f for f in paths_or_urls if f.endswith(EXTENSIONS_INSTALLABLE)] for path_or_url in installable: try: - yield Distribution.from_path_or_url(path_or_url) + dist = Distribution.from_path_or_url(path_or_url) + dists.append(dist) + if TRACE_DEEP: + print( + " ===> dists_from_paths_or_urls:", + dist, + "\n ", + "with URL:", + dist.download_url, + "\n ", + "from URL:", + path_or_url, + ) except InvalidDistributionFilename: - if TRACE: - print(f'Skipping invalid distribution from: {path_or_url}') + if TRACE_DEEP: + print(f" Skipping invalid distribution from: {path_or_url}") continue + return dists def get_distributions(self): """ @@ -1549,50 +1504,54 @@ class Environment: """ An Environment describes a target installation environment with its supported Python version, ABI, platform, implementation and related - attributes. We can use these to pass as `pip download` options and force - fetching only the subset of packages that match these Environment - constraints as opposed to the current running Python interpreter - constraints. + attributes. + + We can use these to pass as `pip download` options and force fetching only + the subset of packages that match these Environment constraints as opposed + to the current running Python interpreter constraints. """ python_version = attr.ib( type=str, - default='', - metadata=dict(help='Python version supported by this environment.'), + default="", + metadata=dict(help="Python version supported by this environment."), ) operating_system = attr.ib( type=str, - default='', - metadata=dict(help='operating system supported by this environment.'), + default="", + metadata=dict(help="operating system supported by this environment."), ) implementation = attr.ib( type=str, - default='cp', - metadata=dict(help='Python implementation supported by this environment.'), + default="cp", + metadata=dict(help="Python implementation supported by this environment."), + repr=False, ) abis = attr.ib( type=list, default=attr.Factory(list), - metadata=dict(help='List of ABI tags supported by this environment.'), + metadata=dict(help="List of ABI tags supported by this environment."), + repr=False, ) platforms = attr.ib( type=list, default=attr.Factory(list), - metadata=dict(help='List of platform tags supported by this environment.'), + metadata=dict(help="List of platform tags supported by this environment."), + repr=False, ) @classmethod def from_pyver_and_os(cls, python_version, operating_system): - if '.' in python_version: - python_version = ''.join(python_version.split('.')) + if "." in python_version: + python_version = "".join(python_version.split(".")) return cls( python_version=python_version, - implementation='cp', + implementation="cp", abis=ABIS_BY_PYTHON_VERSION[python_version], platforms=PLATFORMS_BY_OS[operating_system], operating_system=operating_system, @@ -1600,27 +1559,34 @@ def from_pyver_and_os(cls, python_version, operating_system): def get_pip_cli_options(self): """ - Return a list of pip command line options for this environment. + Return a list of pip download command line options for this environment. """ options = [ - '--python-version', self.python_version, - '--implementation', self.implementation, - '--abi', self.abi, + "--python-version", + self.python_version, + "--implementation", + self.implementation, ] + for abi in self.abis: + options.extend(["--abi", abi]) + for platform in self.platforms: - options.extend(['--platform', platform]) + options.extend(["--platform", platform]) + return options def tags(self): """ Return a set of all the PEP425 tags supported by this environment. """ - return set(utils_pip_compatibility_tags.get_supported( - version=self.python_version or None, - impl=self.implementation or None, - platforms=self.platforms or None, - abis=self.abis or None, - )) + return set( + utils_pip_compatibility_tags.get_supported( + version=self.python_version or None, + impl=self.implementation or None, + platforms=self.platforms or None, + abis=self.abis or None, + ) + ) ################################################################################ # @@ -1630,142 +1596,199 @@ def tags(self): @attr.attributes -class Repository: +class PypiSimpleRepository: """ - A PyPI or links Repository of Python packages: wheels, sdist, ABOUT, etc. + A PyPI repository of Python packages: wheels, sdist, etc. like the public + PyPI simple index. It is populated lazily based on requested packages names. """ - packages_by_normalized_name = attr.ib( - type=dict, - default=attr.Factory(lambda: defaultdict(list)), - metadata=dict(help= - 'Mapping of {package name: [package objects]} available in this repo'), + index_url = attr.ib( + type=str, + default=PYPI_SIMPLE_URL, + metadata=dict(help="Base PyPI simple URL for this index."), ) - packages_by_normalized_name_version = attr.ib( + # we keep a nested mapping of PypiPackage that has this shape: + # {name: {version: PypiPackage, version: PypiPackage, etc} + # the inner versions mapping is sorted by version from oldest to newest + + packages = attr.ib( type=dict, - default=attr.Factory(dict), - metadata=dict(help= - 'Mapping of {(name, version): package object} available in this repo'), + default=attr.Factory(lambda: defaultdict(dict)), + metadata=dict( + help="Mapping of {name: {version: PypiPackage, version: PypiPackage, etc} available in this repo" + ), ) - def get_links(self, *args, **kwargs): - raise NotImplementedError() + fetched_package_normalized_names = attr.ib( + type=set, + default=attr.Factory(set), + metadata=dict(help="A set of already fetched package normalized names."), + ) - def get_versions(self, name): + use_cached_index = attr.ib( + type=bool, + default=False, + metadata=dict(help="If True, use any existing on-disk cached PyPI index files. Otherwise, fetch and cache."), + ) + + def _get_package_versions_map(self, name): """ - Return a list of all available PypiPackage version for this package name. - The list may be empty. + Return a mapping of all available PypiPackage version for this package name. + The mapping may be empty. It is ordered by version from oldest to newest """ - raise NotImplementedError() + assert name + normalized_name = NameVer.normalize_name(name) + versions = self.packages[normalized_name] + if not versions and normalized_name not in self.fetched_package_normalized_names: + self.fetched_package_normalized_names.add(normalized_name) + try: + links = self.fetch_links(normalized_name=normalized_name) + # note that thsi is sorted so the mapping is also sorted + versions = { + package.version: package + for package in PypiPackage.packages_from_many_paths_or_urls(paths_or_urls=links) + } + self.packages[normalized_name] = versions + except RemoteNotFetchedException as e: + if TRACE: + print(f"failed to fetch package name: {name} from: {self.index_url}:\n{e}") + + if not versions and TRACE: + print(f"WARNING: package {name} not found in repo: {self.index_url}") + + return versions - def get_package(self, name, version): + def get_package_versions(self, name): + """ + Return a mapping of all available PypiPackage version as{version: + package} for this package name. The mapping may be empty but not None. + It is sorted by version from oldest to newest. + """ + return dict(self._get_package_versions_map(name)) + + def get_package_version(self, name, version=None): """ Return the PypiPackage with name and version or None. + Return the latest PypiPackage version if version is None. """ - raise NotImplementedError() + if not version: + versions = list(self._get_package_versions_map(name).values()) + return versions and versions[-1] + else: + return self._get_package_versions_map(name).get(version) - def get_latest_version(self, name): + def fetch_links(self, normalized_name): """ - Return the latest PypiPackage version for this package name or None. + Return a list of download link URLs found in a PyPI simple index for package + name using the `index_url` of this repository. """ - raise NotImplementedError() + package_url = f"{self.index_url}/{normalized_name}" + text = CACHE.get( + path_or_url=package_url, + as_text=True, + force=not self.use_cached_index, + ) + links = collect_urls(text) + # TODO: keep sha256 + links = [l.partition("#sha256=") for l in links] + links = [url for url, _, _sha256 in links] + return links + + +PYPI_PUBLIC_REPO = PypiSimpleRepository(index_url=PYPI_SIMPLE_URL) +PYPI_SELFHOSTED_REPO = PypiSimpleRepository(index_url=ABOUT_PYPI_SIMPLE_URL) +DEFAULT_PYPI_REPOS = PYPI_PUBLIC_REPO, PYPI_SELFHOSTED_REPO +DEFAULT_PYPI_REPOS_BY_URL = {r.index_url: r for r in DEFAULT_PYPI_REPOS} @attr.attributes -class LinksRepository(Repository): +class LinksRepository: """ - Represents a simple links repository which is either a local directory with - Python wheels and sdist or a remote URL to an HTML with links to these. - (e.g. suitable for use with pip --find-links). + Represents a simple links repository such an HTTP directory listing or an + HTML page with links. """ - path_or_url = attr.ib( + + url = attr.ib( type=str, - default='', - metadata=dict(help='Package directory path or URL'), + default="", + metadata=dict(help="Links directory URL"), ) links = attr.ib( type=list, default=attr.Factory(list), - metadata=dict(help='List of links available in this repo'), + metadata=dict(help="List of links available in this repo"), + ) + + use_cached_index = attr.ib( + type=bool, + default=False, + metadata=dict(help="If True, use any existing on-disk cached index files. Otherwise, fetch and cache."), ) def __attrs_post_init__(self): if not self.links: - self.links = get_paths_or_urls(links_url=self.path_or_url) - if not self.packages_by_normalized_name: - for p in PypiPackage.packages_from_many_paths_or_urls(paths_or_urls=self.links): - normalized_name = p.normalized_name - self.packages_by_normalized_name[normalized_name].append(p) - self.packages_by_normalized_name_version[(normalized_name, p.version)] = p - - def get_links(self, *args, **kwargs): - return self.links or [] + self.links = self.find_links() - def get_versions(self, name): - name = name and NameVer.normalize_name(name) - return self.packages_by_normalized_name.get(name, []) - - def get_latest_version(self, name): - return PypiPackage.get_latest_version(name, self.get_versions(name)) - - def get_package(self, name, version): - return PypiPackage.get_name_version(name, version, self.get_versions(name)) + def find_links(self, _CACHE=[]): + """ + Return a list of link URLs found in the HTML page at `self.url` + """ + if _CACHE: + return _CACHE + links_url = self.url + if TRACE_DEEP: + print(f"Finding links from: {links_url}") + plinks_url = urllib.parse.urlparse(links_url) + base_url = urllib.parse.SplitResult( + plinks_url.scheme, plinks_url.netloc, "", "", "" + ).geturl() -@attr.attributes -class PypiRepository(Repository): - """ - Represents the public PyPI simple index. - It is populated lazily based on requested packages names - """ - simple_url = attr.ib( - type=str, - default=PYPI_SIMPLE_URL, - metadata=dict(help='Base PyPI simple URL for this index.'), - ) + if TRACE_DEEP: + print(f"Base URL {base_url}") - links_by_normalized_name = attr.ib( - type=dict, - default=attr.Factory(lambda: defaultdict(list)), - metadata=dict(help='Mapping of {package name: [links]} available in this repo'), - ) + text = CACHE.get( + path_or_url=links_url, + as_text=True, + force=not self.use_cached_index, + ) - def _fetch_links(self, name): - name = name and NameVer.normalize_name(name) - return find_pypi_links(name=name, simple_url=self.simple_url) + links = [] + for link in collect_urls(text): + if not link.endswith(EXTENSIONS): + continue - def _populate_links_and_packages(self, name): - name = name and NameVer.normalize_name(name) - if name in self.links_by_normalized_name: - return + plink = urllib.parse.urlsplit(link) - links = self._fetch_links(name) - self.links_by_normalized_name[name] = links + if plink.scheme: + # full URL kept as-is + url = link - packages = list(PypiPackage.packages_from_many_paths_or_urls(paths_or_urls=links)) - self.packages_by_normalized_name[name] = packages + if plink.path.startswith("/"): + # absolute link + url = f"{base_url}{link}" - for p in packages: - name = name and NameVer.normalize_name(p.name) - self.packages_by_normalized_name_version[(name, p.version)] = p + else: + # relative link + url = f"{links_url}/{link}" - def get_links(self, name, *args, **kwargs): - name = name and NameVer.normalize_name(name) - self._populate_links_and_packages(name) - return self.links_by_normalized_name.get(name, []) + if TRACE_DEEP: + print(f"Adding URL: {url}") - def get_versions(self, name): - name = name and NameVer.normalize_name(name) - self._populate_links_and_packages(name) - return self.packages_by_normalized_name.get(name, []) + links.append(url) - def get_latest_version(self, name): - return PypiPackage.get_latest_version(name, self.get_versions(name)) + if TRACE: + print(f"Found {len(links)} links at {links_url}") + _CACHE.extend(links) + return links - def get_package(self, name, version): - return PypiPackage.get_name_version(name, version, self.get_versions(name)) + @classmethod + def from_url(cls, url=ABOUT_BASE_URL, _LINKS_REPO={}, use_cached_index=False): + if url not in _LINKS_REPO: + _LINKS_REPO[url] = cls(url=url, use_cached_index=use_cached_index) + return _LINKS_REPO[url] ################################################################################ # Globals for remote repos to be lazily created and cached on first use for the @@ -1778,51 +1801,7 @@ def get_local_packages(directory=THIRDPARTY_DIR): Return the list of all PypiPackage objects built from a local directory. Return an empty list if the package cannot be found. """ - return list(PypiPackage.packages_from_one_path_or_url(path_or_url=directory)) - - -def get_local_repo(directory=THIRDPARTY_DIR): - return LinksRepository(path_or_url=directory) - - -_REMOTE_REPO = None - - -def get_remote_repo(remote_links_url=REMOTE_LINKS_URL): - global _REMOTE_REPO - if not _REMOTE_REPO: - _REMOTE_REPO = LinksRepository(path_or_url=remote_links_url) - return _REMOTE_REPO - - -def get_remote_package(name, version, remote_links_url=REMOTE_LINKS_URL): - """ - Return a PypiPackage or None. - """ - try: - return get_remote_repo(remote_links_url).get_package(name, version) - except RemoteNotFetchedException as e: - print(f'Failed to fetch remote package info: {e}') - - -_PYPI_REPO = None - - -def get_pypi_repo(pypi_simple_url=PYPI_SIMPLE_URL): - global _PYPI_REPO - if not _PYPI_REPO: - _PYPI_REPO = PypiRepository(simple_url=pypi_simple_url) - return _PYPI_REPO - - -def get_pypi_package(name, version, pypi_simple_url=PYPI_SIMPLE_URL): - """ - Return a PypiPackage or None. - """ - try: - return get_pypi_repo(pypi_simple_url).get_package(name, version) - except RemoteNotFetchedException as e: - print(f'Failed to fetch remote package info: {e}') + return list(PypiPackage.packages_from_dir(directory=directory)) ################################################################################ # @@ -1843,34 +1822,31 @@ class Cache: def __attrs_post_init__(self): os.makedirs(self.directory, exist_ok=True) - def clear(self): - shutil.rmtree(self.directory) - - def get(self, path_or_url, as_text=True): + def get(self, path_or_url, as_text=True, force=False): """ - Get a file from a `path_or_url` through the cache. - `path_or_url` can be a path or a URL to a file. + Return the content fetched from a ``path_or_url`` through the cache. + Raise an Exception on errors. Treats the content as text if as_text is + True otherwise as treat as binary. `path_or_url` can be a path or a URL + to a file. """ - filename = os.path.basename(path_or_url.strip('/')) - cached = os.path.join(self.directory, filename) + cache_key = quote_plus(path_or_url.strip("/")) + cached = os.path.join(self.directory, cache_key) - if not os.path.exists(cached): + if force or not os.path.exists(cached): + if TRACE_DEEP: + print(f" FILE CACHE MISS: {path_or_url}") content = get_file_content(path_or_url=path_or_url, as_text=as_text) - wmode = 'w' if as_text else 'wb' + wmode = "w" if as_text else "wb" with open(cached, wmode) as fo: fo.write(content) return content else: + if TRACE_DEEP: + print(f" FILE CACHE HIT: {path_or_url}") return get_local_file_content(path=cached, as_text=as_text) - def put(self, filename, content): - """ - Put in the cache the `content` of `filename`. - """ - cached = os.path.join(self.directory, filename) - wmode = 'wb' if isinstance(content, bytes) else 'w' - with open(cached, wmode) as fo: - fo.write(content) + +CACHE = Cache() def get_file_content(path_or_url, as_text=True): @@ -1878,18 +1854,19 @@ def get_file_content(path_or_url, as_text=True): Fetch and return the content at `path_or_url` from either a local path or a remote URL. Return the content as bytes is `as_text` is False. """ - if (path_or_url.startswith('file://') - or (path_or_url.startswith('/') and os.path.exists(path_or_url)) - ): - return get_local_file_content(path=path_or_url, as_text=as_text) - - elif path_or_url.startswith('https://'): - if TRACE: print(f'Fetching: {path_or_url}') + if path_or_url.startswith("https://"): + if TRACE_DEEP: + print(f"Fetching: {path_or_url}") _headers, content = get_remote_file_content(url=path_or_url, as_text=as_text) return content + elif path_or_url.startswith("file://") or ( + path_or_url.startswith("/") and os.path.exists(path_or_url) + ): + return get_local_file_content(path=path_or_url, as_text=as_text) + else: - raise Exception(f'Unsupported URL scheme: {path_or_url}') + raise Exception(f"Unsupported URL scheme: {path_or_url}") def get_local_file_content(path, as_text=True): @@ -1897,10 +1874,10 @@ def get_local_file_content(path, as_text=True): Return the content at `url` as text. Return the content as bytes is `as_text` is False. """ - if path.startswith('file://'): + if path.startswith("file://"): path = path[7:] - mode = 'r' if as_text else 'rb' + mode = "r" if as_text else "rb" with open(path, mode) as fo: return fo.read() @@ -1909,7 +1886,13 @@ class RemoteNotFetchedException(Exception): pass -def get_remote_file_content(url, as_text=True, headers_only=False, headers=None, _delay=0,): +def get_remote_file_content( + url, + as_text=True, + headers_only=False, + headers=None, + _delay=0, +): """ Fetch and return a tuple of (headers, content) at `url`. Return content as a text string if `as_text` is True. Otherwise return the content as bytes. @@ -1924,6 +1907,7 @@ def get_remote_file_content(url, as_text=True, headers_only=False, headers=None, # using a GET with stream=True ensure we get the the final header from # several redirects and that we can ignore content there. A HEAD request may # not get us this last header + print(f" DOWNLOADING: {url}") with requests.get(url, allow_redirects=True, stream=True, headers=headers) as response: status = response.status_code if status != requests.codes.ok: # NOQA @@ -1939,7 +1923,7 @@ def get_remote_file_content(url, as_text=True, headers_only=False, headers=None, ) else: - raise RemoteNotFetchedException(f'Failed HTTP request from {url} with {status}') + raise RemoteNotFetchedException(f"Failed HTTP request from {url} with {status}") if headers_only: return response.headers, None @@ -1947,465 +1931,53 @@ def get_remote_file_content(url, as_text=True, headers_only=False, headers=None, return response.headers, response.text if as_text else response.content -def get_url_content_if_modified(url, md5, _delay=0,): - """ - Return fetched content bytes at `url` or None if the md5 has not changed. - Retries multiple times to fetch if there is a HTTP 429 throttling response - and this with an increasing delay. - """ - time.sleep(_delay) - headers = None - if md5: - etag = f'"{md5}"' - headers = {'If-None-Match': f'{etag}'} - - # using a GET with stream=True ensure we get the the final header from - # several redirects and that we can ignore content there. A HEAD request may - # not get us this last header - with requests.get(url, allow_redirects=True, stream=True, headers=headers) as response: - status = response.status_code - if status == requests.codes.too_many_requests and _delay < 20: # NOQA - # too many requests: start waiting with some exponential delay - _delay = (_delay * 2) or 1 - return get_url_content_if_modified(url=url, md5=md5, _delay=_delay) - - elif status == requests.codes.not_modified: # NOQA - # all is well, the md5 is the same - return None - - elif status != requests.codes.ok: # NOQA - raise RemoteNotFetchedException(f'Failed HTTP request from {url} with {status}') - - return response.content - - -def get_remote_headers(url): - """ - Fetch and return a mapping of HTTP headers of `url`. - """ - headers, _content = get_remote_file_content(url, headers_only=True) - return headers - - -def fetch_and_save_filename_from_paths_or_urls( +def fetch_and_save( + path_or_url, + dest_dir, filename, - paths_or_urls, - dest_dir=THIRDPARTY_DIR, as_text=True, ): """ - Return the content from fetching the `filename` file name found in the - `paths_or_urls` list of URLs or paths and save to `dest_dir`. Raise an - Exception on errors. Treats the content as text if `as_text` is True - otherwise as binary. + Fetch content at ``path_or_url`` URL or path and save this to + ``dest_dir/filername``. Return the fetched content. Raise an Exception on + errors. Treats the content as text if as_text is True otherwise as treat as + binary. """ - path_or_url = get_link_for_filename( - filename=filename, - paths_or_urls=paths_or_urls, - ) - - return fetch_and_save_path_or_url( - filename=filename, - dest_dir=dest_dir, + content = CACHE.get( path_or_url=path_or_url, as_text=as_text, ) - - -def fetch_content_from_path_or_url_through_cache(path_or_url, as_text=True, cache=Cache()): - """ - Return the content from fetching at path or URL. Raise an Exception on - errors. Treats the content as text if as_text is True otherwise as treat as - binary. Use the provided file cache. This is the main entry for using the - cache. - - Note: the `cache` argument is a global, though it does not really matter - since it does not hold any state which is only kept on disk. - """ - if cache: - return cache.get(path_or_url=path_or_url, as_text=as_text) - else: - return get_file_content(path_or_url=path_or_url, as_text=as_text) - - -def fetch_and_save_path_or_url(filename, dest_dir, path_or_url, as_text=True, through_cache=True): - """ - Return the content from fetching the `filename` file name at URL or path - and save to `dest_dir`. Raise an Exception on errors. Treats the content as - text if as_text is True otherwise as treat as binary. - """ - if through_cache: - content = fetch_content_from_path_or_url_through_cache(path_or_url, as_text) - else: - content = fetch_content_from_path_or_url_through_cache(path_or_url, as_text, cache=None) - output = os.path.join(dest_dir, filename) - wmode = 'w' if as_text else 'wb' + wmode = "w" if as_text else "wb" with open(output, wmode) as fo: fo.write(content) return content ################################################################################ # -# Sync and fix local thirdparty directory for various issues and gaps +# Functions to update or fetch ABOUT and license files # ################################################################################ -def fetch_missing_sources(dest_dir=THIRDPARTY_DIR): - """ - Given a thirdparty dir, fetch missing source distributions from our remote - repo or PyPI. Return a list of (name, version) tuples for source - distribution that were not found - """ - not_found = [] - local_packages = get_local_packages(directory=dest_dir) - remote_repo = get_remote_repo() - pypi_repo = get_pypi_repo() - - for package in local_packages: - if not package.sdist: - print(f'Finding sources for: {package.name}=={package.version}: ', end='') - try: - pypi_package = pypi_repo.get_package( - name=package.name, version=package.version) - - if pypi_package and pypi_package.sdist: - print(f'Fetching sources from Pypi') - pypi_package.fetch_sdist(dest_dir=dest_dir) - continue - else: - remote_package = remote_repo.get_package( - name=package.name, version=package.version) - - if remote_package and remote_package.sdist: - print(f'Fetching sources from Remote') - remote_package.fetch_sdist(dest_dir=dest_dir) - continue - - except RemoteNotFetchedException as e: - print(f'Failed to fetch remote package info: {e}') - - print(f'No sources found') - not_found.append((package.name, package.version,)) - - return not_found - - -def fetch_missing_wheels( - python_versions=PYTHON_VERSIONS, - operating_systems=PLATFORMS_BY_OS, +def clean_about_files( dest_dir=THIRDPARTY_DIR, ): """ - Given a thirdparty dir fetch missing wheels for all known combos of Python - versions and OS. Return a list of tuple (Package, Environment) for wheels - that were not found locally or remotely. + Given a thirdparty dir, clean ABOUT files """ local_packages = get_local_packages(directory=dest_dir) - evts = itertools.product(python_versions, operating_systems) - environments = [Environment.from_pyver_and_os(pyv, os) for pyv, os in evts] - packages_and_envts = itertools.product(local_packages, environments) - - not_fetched = [] - fetched_filenames = set() - for package, envt in packages_and_envts: - - filename = package.fetch_wheel( - environment=envt, - fetched_filenames=fetched_filenames, - dest_dir=dest_dir, - ) - - if filename: - fetched_filenames.add(filename) - else: - not_fetched.append((package, envt,)) - - return not_fetched - - -def build_missing_wheels( - packages_and_envts, - build_remotely=False, - with_deps=False, - dest_dir=THIRDPARTY_DIR, -): - """ - Build all wheels in a list of tuple (Package, Environment) and save in - `dest_dir`. Return a list of tuple (Package, Environment), and a list of - built wheel filenames. - """ - - not_built = [] - built_filenames = [] - - packages_and_envts = itertools.groupby( - sorted(packages_and_envts), key=operator.itemgetter(0)) - - for package, pkg_envts in packages_and_envts: - - envts = [envt for _pkg, envt in pkg_envts] - python_versions = sorted(set(e.python_version for e in envts)) - operating_systems = sorted(set(e.operating_system for e in envts)) - built = None - try: - built = build_wheels( - requirements_specifier=package.specifier, - with_deps=with_deps, - build_remotely=build_remotely, - python_versions=python_versions, - operating_systems=operating_systems, - verbose=False, - dest_dir=dest_dir, - ) - print('.') - except Exception as e: - import traceback - print('#############################################################') - print('############# WHEEL BUILD FAILED ######################') - traceback.print_exc() - print() - print('#############################################################') - - if not built: - for envt in pkg_envts: - not_built.append((package, envt)) - else: - for bfn in built: - print(f' --> Built wheel: {bfn}') - built_filenames.append(bfn) - - return not_built, built_filenames - -################################################################################ -# -# Functions to handle remote or local repo used to "find-links" -# -################################################################################ - - -def get_paths_or_urls(links_url): - if links_url.startswith('https:'): - paths_or_urls = find_links_from_release_url(links_url) - else: - paths_or_urls = find_links_from_dir(links_url) - return paths_or_urls - - -def find_links_from_dir(directory=THIRDPARTY_DIR): - """ - Return a list of path to files in `directory` for any file that ends with - any of the extension in the list of `extensions` strings. - """ - base = os.path.abspath(directory) - files = [os.path.join(base, f) for f in os.listdir(base) if f.endswith(EXTENSIONS)] - return files - - -get_links = re.compile('href="([^"]+)"').findall - - -def find_links_from_release_url(links_url=REMOTE_LINKS_URL): - """ - Return a list of download link URLs found in the HTML page at `links_url` - URL that starts with the `prefix` string and ends with any of the extension - in the list of `extensions` strings. Use the `base_url` to prefix the links. - """ - if TRACE: print(f'Finding links for {links_url}') - - plinks_url = urllib.parse.urlparse(links_url) - - base_url = urllib.parse.SplitResult( - plinks_url.scheme, plinks_url.netloc, '', '', '').geturl() - - if TRACE: print(f'Base URL {base_url}') - - _headers, text = get_remote_file_content(links_url) - links = [] - for link in get_links(text): - if not link.endswith(EXTENSIONS): - continue - - plink = urllib.parse.urlsplit(link) - - if plink.scheme: - # full URL kept as-is - url = link - - if plink.path.startswith('/'): - # absolute link - url = f'{base_url}{link}' - - else: - # relative link - url = f'{links_url}/{link}' - - if TRACE: print(f'Adding URL: {url}') - - links.append(url) - - if TRACE: print(f'Found {len(links)} links at {links_url}') - return links - - -def find_pypi_links(name, simple_url=PYPI_SIMPLE_URL): - """ - Return a list of download link URLs found in a PyPI simple index for package name. - with the list of `extensions` strings. Use the `simple_url` PyPI url. - """ - if TRACE: print(f'Finding links for {simple_url}') - - name = name and NameVer.normalize_name(name) - simple_url = simple_url.strip('/') - simple_url = f'{simple_url}/{name}' - - _headers, text = get_remote_file_content(simple_url) - links = get_links(text) - # TODO: keep sha256 - links = [l.partition('#sha256=') for l in links] - links = [url for url, _, _sha256 in links] - links = [l for l in links if l.endswith(EXTENSIONS)] - return links - - -def get_link_for_filename(filename, paths_or_urls): - """ - Return a link for `filename` found in the `links` list of URLs or paths. Raise an - exception if no link is found or if there are more than one link for that - file name. - """ - path_or_url = [l for l in paths_or_urls if l.endswith(f'/{filename}')] - if not path_or_url: - raise Exception(f'Missing link to file: {filename}') - if not len(path_or_url) == 1: - raise Exception(f'Multiple links to file: {filename}: \n' + '\n'.join(path_or_url)) - return path_or_url[0] - -################################################################################ -# -# Requirements processing -# -################################################################################ - - -class MissingRequirementException(Exception): - pass - - -def get_required_packages(required_name_versions): - """ - Return a tuple of (remote packages, PyPI packages) where each is a mapping - of {(name, version): PypiPackage} for packages listed in the - `required_name_versions` list of (name, version) tuples. Raise a - MissingRequirementException with a list of missing (name, version) if a - requirement cannot be satisfied remotely or in PyPI. - """ - remote_repo = get_remote_repo() - - remote_packages = {(name, version): remote_repo.get_package(name, version) - for name, version in required_name_versions} - - pypi_repo = get_pypi_repo() - pypi_packages = {(name, version): pypi_repo.get_package(name, version) - for name, version in required_name_versions} - - # remove any empty package (e.g. that do not exist in some place) - remote_packages = {nv: p for nv, p in remote_packages.items() if p} - pypi_packages = {nv: p for nv, p in pypi_packages.items() if p} - - # check that we are not missing any - repos_name_versions = set(remote_packages.keys()) | set(pypi_packages.keys()) - missing_name_versions = required_name_versions.difference(repos_name_versions) - if missing_name_versions: - raise MissingRequirementException(sorted(missing_name_versions)) - - return remote_packages, pypi_packages - - -def get_required_remote_packages( - requirements_file='requirements.txt', - force_pinned=True, - remote_links_url=REMOTE_LINKS_URL, -): - """ - Yield tuple of (name, version, PypiPackage) for packages listed in the - `requirements_file` requirements file and found in the PyPI-like link repo - ``remote_links_url`` if this is a URL. Treat this ``remote_links_url`` as a - local directory path to a wheels directory if this is not a a URL. - """ - required_name_versions = load_requirements( - requirements_file=requirements_file, - force_pinned=force_pinned, - ) - - if remote_links_url.startswith('https://'): - repo = get_remote_repo(remote_links_url=remote_links_url) - else: - # a local path - assert os.path.exists(remote_links_url), f'Path does not exist: {remote_links_url}' - repo = get_local_repo(directory=remote_links_url) - - for name, version in required_name_versions: - if version: - yield name, version, repo.get_package(name, version) - else: - yield name, version, repo.get_latest_version(name) - - -def update_requirements(name, version=None, requirements_file='requirements.txt'): - """ - Upgrade or add `package_name` with `new_version` to the `requirements_file` - requirements file. Write back requirements sorted with name and version - canonicalized. Note: this cannot deal with hashed or unpinned requirements. - Do nothing if the version already exists as pinned. - """ - normalized_name = NameVer.normalize_name(name) - - is_updated = False - updated_name_versions = [] - for existing_name, existing_version in load_requirements(requirements_file, force_pinned=False): - - existing_normalized_name = NameVer.normalize_name(existing_name) - - if normalized_name == existing_normalized_name: - if version != existing_version: - is_updated = True - updated_name_versions.append((existing_normalized_name, existing_version,)) - - if is_updated: - updated_name_versions = sorted(updated_name_versions) - nvs = '\n'.join(f'{name}=={version}' for name, version in updated_name_versions) - - with open(requirements_file, 'w') as fo: - fo.write(nvs) - - -def hash_requirements(dest_dir=THIRDPARTY_DIR, requirements_file='requirements.txt'): - """ - Hash all the requirements found in the `requirements_file` - requirements file based on distributions available in `dest_dir` - """ - local_repo = get_local_repo(directory=dest_dir) - packages_by_normalized_name_version = local_repo.packages_by_normalized_name_version - hashed = [] - for name, version in load_requirements(requirements_file, force_pinned=True): - package = packages_by_normalized_name_version.get((name, version)) - if not package: - raise Exception(f'Missing required package {name}=={version}') - hashed.append(package.specifier_with_hashes) - - with open(requirements_file, 'w') as fo: - fo.write('\n'.join(hashed)) + for local_package in local_packages: + for local_dist in local_package.get_distributions(): + local_dist.load_about_data(dest_dir=dest_dir) + local_dist.set_checksums(dest_dir=dest_dir) -################################################################################ -# -# Functions to update or fetch ABOUT and license files -# -################################################################################ + if "classifiers" in local_dist.extra_data: + local_dist.extra_data.pop("classifiers", None) + local_dist.save_about_and_notice_files(dest_dir) -def add_fetch_or_update_about_and_license_files(dest_dir=THIRDPARTY_DIR, include_remote=True): +def fetch_abouts_and_licenses(dest_dir=THIRDPARTY_DIR, use_cached_index=False): """ Given a thirdparty dir, add missing ABOUT. LICENSE and NOTICE files using best efforts: @@ -2415,23 +1987,24 @@ def add_fetch_or_update_about_and_license_files(dest_dir=THIRDPARTY_DIR, include - derive from existing distribution with same name and latest version that would have such ABOUT file - extract ABOUT file data from distributions PKGINFO or METADATA files - - TODO: make API calls to fetch package data from DejaCode - The process consists in load and iterate on every package distributions, - collect data and then acsk to save. + Use available existing on-disk cached index if use_cached_index is True. """ - local_packages = get_local_packages(directory=dest_dir) - local_repo = get_local_repo(directory=dest_dir) - - remote_repo = get_remote_repo() - def get_other_dists(_package, _dist): """ - Return a list of all the dists from package that are not the `dist` object + Return a list of all the dists from `_package` that are not the `_dist` + object """ return [d for d in _package.get_distributions() if d != _dist] + local_packages = get_local_packages(directory=dest_dir) + packages_by_name = defaultdict(list) + for local_package in local_packages: + distributions = list(local_package.get_distributions()) + distribution = distributions[0] + packages_by_name[distribution.name].append(local_package) + for local_package in local_packages: for local_dist in local_package.get_distributions(): local_dist.load_about_data(dest_dir=dest_dir) @@ -2440,7 +2013,7 @@ def get_other_dists(_package, _dist): # if has key data we may look to improve later, but we can move on if local_dist.has_key_metadata(): local_dist.save_about_and_notice_files(dest_dir=dest_dir) - local_dist.fetch_license_files(dest_dir=dest_dir) + local_dist.fetch_license_files(dest_dir=dest_dir, use_cached_index=use_cached_index) continue # lets try to get from another dist of the same local package @@ -2452,18 +2025,18 @@ def get_other_dists(_package, _dist): # if has key data we may look to improve later, but we can move on if local_dist.has_key_metadata(): local_dist.save_about_and_notice_files(dest_dir=dest_dir) - local_dist.fetch_license_files(dest_dir=dest_dir) + local_dist.fetch_license_files(dest_dir=dest_dir, use_cached_index=use_cached_index) continue - # try to get a latest version of the same package that is not our version + # try to get another version of the same package that is not our version other_local_packages = [ - p for p in local_repo.get_versions(local_package.name) + p + for p in packages_by_name[local_package.name] if p.version != local_package.version ] - - latest_local_version = other_local_packages and other_local_packages[-1] - if latest_local_version: - latest_local_dists = list(latest_local_version.get_distributions()) + other_local_version = other_local_packages and other_local_packages[-1] + if other_local_version: + latest_local_dists = list(other_local_version.get_distributions()) for latest_local_dist in latest_local_dists: latest_local_dist.load_about_data(dest_dir=dest_dir) if not latest_local_dist.has_key_metadata(): @@ -2478,45 +2051,47 @@ def get_other_dists(_package, _dist): # if has key data we may look to improve later, but we can move on if local_dist.has_key_metadata(): local_dist.save_about_and_notice_files(dest_dir=dest_dir) - local_dist.fetch_license_files(dest_dir=dest_dir) + local_dist.fetch_license_files(dest_dir=dest_dir, use_cached_index=use_cached_index) continue - if include_remote: - # lets try to fetch remotely - local_dist.load_remote_about_data() + # lets try to fetch remotely + local_dist.load_remote_about_data() + + # if has key data we may look to improve later, but we can move on + if local_dist.has_key_metadata(): + local_dist.save_about_and_notice_files(dest_dir=dest_dir) + local_dist.fetch_license_files(dest_dir=dest_dir, use_cached_index=use_cached_index) + continue + + # try to get a latest version of the same package that is not our version + # and that is in our self hosted repo + lpv = local_package.version + lpn = local_package.name + + other_remote_packages = [ + p for v, p in PYPI_SELFHOSTED_REPO.get_package_versions(lpn).items() if v != lpv + ] + + latest_version = other_remote_packages and other_remote_packages[-1] + if latest_version: + latest_dists = list(latest_version.get_distributions()) + for remote_dist in latest_dists: + remote_dist.load_remote_about_data() + if not remote_dist.has_key_metadata(): + # there is not much value to get other data if we are missing the key ones + continue + else: + local_dist.update_from_other_dist(remote_dist) + # if has key data we may look to improve later, but we can move on + if local_dist.has_key_metadata(): + break # if has key data we may look to improve later, but we can move on if local_dist.has_key_metadata(): local_dist.save_about_and_notice_files(dest_dir=dest_dir) - local_dist.fetch_license_files(dest_dir=dest_dir) + local_dist.fetch_license_files(dest_dir=dest_dir, use_cached_index=use_cached_index) continue - # try to get a latest version of the same package that is not our version - other_remote_packages = [ - p for p in remote_repo.get_versions(local_package.name) - if p.version != local_package.version - ] - - latest_version = other_remote_packages and other_remote_packages[-1] - if latest_version: - latest_dists = list(latest_version.get_distributions()) - for remote_dist in latest_dists: - remote_dist.load_remote_about_data() - if not remote_dist.has_key_metadata(): - # there is not much value to get other data if we are missing the key ones - continue - else: - local_dist.update_from_other_dist(remote_dist) - # if has key data we may look to improve later, but we can move on - if local_dist.has_key_metadata(): - break - - # if has key data we may look to improve later, but we can move on - if local_dist.has_key_metadata(): - local_dist.save_about_and_notice_files(dest_dir=dest_dir) - local_dist.fetch_license_files(dest_dir=dest_dir) - continue - # try to get data from pkginfo (no license though) local_dist.load_pkginfo_data(dest_dir=dest_dir) @@ -2524,15 +2099,13 @@ def get_other_dists(_package, _dist): # if local_dist.has_key_metadata() or not local_dist.has_key_metadata(): local_dist.save_about_and_notice_files(dest_dir) - lic_errs = local_dist.fetch_license_files(dest_dir) - - # TODO: try to get data from dejacode + lic_errs = local_dist.fetch_license_files(dest_dir, use_cached_index=use_cached_index) if not local_dist.has_key_metadata(): - print(f'Unable to add essential ABOUT data for: {local_dist}') + print(f"Unable to add essential ABOUT data for: {local_dist}") if lic_errs: - lic_errs = '\n'.join(lic_errs) - print(f'Failed to fetch some licenses:: {lic_errs}') + lic_errs = "\n".join(lic_errs) + print(f"Failed to fetch some licenses:: {lic_errs}") ################################################################################ # @@ -2541,397 +2114,116 @@ def get_other_dists(_package, _dist): ################################################################################ -def call(args): +def call(args, verbose=TRACE): """ - Call args in a subprocess and display output on the fly. - Return or raise stdout, stderr, returncode + Call args in a subprocess and display output on the fly if ``trace`` is True. + Return a tuple of (returncode, stdout, stderr) """ - if TRACE: print('Calling:', ' '.join(args)) + if TRACE_DEEP: + print("Calling:", " ".join(args)) with subprocess.Popen( - args, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - encoding='utf-8' + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding="utf-8" ) as process: + stdouts = [] while True: line = process.stdout.readline() if not line and process.poll() is not None: break - if TRACE: print(line.rstrip(), flush=True) + stdouts.append(line) + if verbose: + print(line.rstrip(), flush=True) stdout, stderr = process.communicate() - returncode = process.returncode - if returncode == 0: - return returncode, stdout, stderr - else: - raise Exception(returncode, stdout, stderr) - - -def add_or_upgrade_built_wheels( - name, - version=None, - python_versions=PYTHON_VERSIONS, - operating_systems=PLATFORMS_BY_OS, - dest_dir=THIRDPARTY_DIR, - build_remotely=False, - with_deps=False, - verbose=False, -): - """ - Add or update package `name` and `version` as a binary wheel saved in - `dest_dir`. Use the latest version if `version` is None. Return the a list - of the collected, fetched or built wheel file names or an empty list. - - Use the provided lists of `python_versions` (e.g. "36", "39") and - `operating_systems` (e.g. linux, windows or macos) to decide which specific - wheel to fetch or build. - - Include wheels for all dependencies if `with_deps` is True. - Build remotely is `build_remotely` is True. - """ - assert name, 'Name is required' - ver = version and f'=={version}' or '' - print(f'\nAdding wheels for package: {name}{ver}') - - wheel_filenames = [] - # a mapping of {req specifier: {mapping build_wheels kwargs}} - wheels_to_build = {} - for python_version, operating_system in itertools.product(python_versions, operating_systems): - print(f' Adding wheels for package: {name}{ver} on {python_version,} and {operating_system}') - environment = Environment.from_pyver_and_os(python_version, operating_system) - - # Check if requested wheel already exists locally for this version - local_repo = get_local_repo(directory=dest_dir) - local_package = local_repo.get_package(name=name, version=version) - - has_local_wheel = False - if version and local_package: - for wheel in local_package.get_supported_wheels(environment): - has_local_wheel = True - wheel_filenames.append(wheel.filename) - break - if has_local_wheel: - print(f' local wheel exists: {wheel.filename}') - continue - - if not version: - pypi_package = get_pypi_repo().get_latest_version(name) - version = pypi_package.version - - # Check if requested wheel already exists remotely or in Pypi for this version - wheel_filename = fetch_package_wheel( - name=name, version=version, environment=environment, dest_dir=dest_dir) - if wheel_filename: - wheel_filenames.append(wheel_filename) - - # the wheel is not available locally, remotely or in Pypi - # we need to build binary from sources - requirements_specifier = f'{name}=={version}' - to_build = wheels_to_build.get(requirements_specifier) - if to_build: - to_build['python_versions'].append(python_version) - to_build['operating_systems'].append(operating_system) - else: - wheels_to_build[requirements_specifier] = dict( - requirements_specifier=requirements_specifier, - python_versions=[python_version], - operating_systems=[operating_system], - dest_dir=dest_dir, - build_remotely=build_remotely, - with_deps=with_deps, - verbose=verbose, - ) - - for build_wheels_kwargs in wheels_to_build.values(): - bwheel_filenames = build_wheels(**build_wheels_kwargs) - wheel_filenames.extend(bwheel_filenames) - - return sorted(set(wheel_filenames)) - - -def build_wheels( - requirements_specifier, - python_versions=PYTHON_VERSIONS, - operating_systems=PLATFORMS_BY_OS, - dest_dir=THIRDPARTY_DIR, - build_remotely=False, - with_deps=False, - verbose=False, -): - """ - Given a pip `requirements_specifier` string (such as package names or as - name==version), build the corresponding binary wheel(s) for all - `python_versions` and `operating_systems` combinations and save them - back in `dest_dir` and return a list of built wheel file names. - - Include wheels for all dependencies if `with_deps` is True. - - First try to build locally to process pure Python wheels, and fall back to - build remotey on all requested Pythons and operating systems. - """ - all_pure, builds = build_wheels_locally_if_pure_python( - requirements_specifier=requirements_specifier, - with_deps=with_deps, - verbose=verbose, - dest_dir=dest_dir, - ) - for local_build in builds: - print(f'Built wheel: {local_build}') - - if all_pure: - return builds - - if build_remotely: - remote_builds = build_wheels_remotely_on_multiple_platforms( - requirements_specifier=requirements_specifier, - with_deps=with_deps, - python_versions=python_versions, - operating_systems=operating_systems, - verbose=verbose, - dest_dir=dest_dir, - ) - builds.extend(remote_builds) + if not stdout.strip(): + stdout = "\n".join(stdouts) + return process.returncode, stdout, stderr - return builds - -def build_wheels_remotely_on_multiple_platforms( - requirements_specifier, - with_deps=False, - python_versions=PYTHON_VERSIONS, - operating_systems=PLATFORMS_BY_OS, - verbose=False, - dest_dir=THIRDPARTY_DIR, -): - """ - Given pip `requirements_specifier` string (such as package names or as - name==version), build the corresponding binary wheel(s) including wheels for - all dependencies for all `python_versions` and `operating_systems` - combinations and save them back in `dest_dir` and return a list of built - wheel file names. - """ - check_romp_is_configured() - pyos_options = get_romp_pyos_options(python_versions, operating_systems) - deps = '' if with_deps else '--no-deps' - verbose = '--verbose' if verbose else '' - - romp_args = ([ - 'romp', - '--interpreter', 'cpython', - '--architecture', 'x86_64', - '--check-period', '5', # in seconds - - ] + pyos_options + [ - - '--artifact-paths', '*.whl', - '--artifact', 'artifacts.tar.gz', - '--command', - # create a virtualenv, upgrade pip -# f'python -m ensurepip --user --upgrade; ' - f'python -m pip {verbose} install --user --upgrade pip setuptools wheel; ' - f'python -m pip {verbose} wheel {deps} {requirements_specifier}', - ]) - - if verbose: - romp_args.append('--verbose') - - print(f'Building wheels for: {requirements_specifier}') - print(f'Using command:', ' '.join(romp_args)) - call(romp_args) - - wheel_filenames = extract_tar('artifacts.tar.gz', dest_dir) - for wfn in wheel_filenames: - print(f' built wheel: {wfn}') - return wheel_filenames - - -def get_romp_pyos_options( - python_versions=PYTHON_VERSIONS, - operating_systems=PLATFORMS_BY_OS, -): - """ - Return a list of CLI options for romp - For example: - >>> expected = ['--version', '3.6', '--version', '3.7', '--version', '3.8', - ... '--version', '3.9', '--platform', 'linux', '--platform', 'macos', - ... '--platform', 'windows'] - >>> assert get_romp_pyos_options() == expected - """ - python_dot_versions = ['.'.join(pv) for pv in sorted(set(python_versions))] - pyos_options = list(itertools.chain.from_iterable( - ('--version', ver) for ver in python_dot_versions)) - - pyos_options += list(itertools.chain.from_iterable( - ('--platform' , plat) for plat in sorted(set(operating_systems)))) - - return pyos_options - - -def check_romp_is_configured(): - # these environment variable must be set before - has_envt = ( - os.environ.get('ROMP_BUILD_REQUEST_URL') and - os.environ.get('ROMP_DEFINITION_ID') and - os.environ.get('ROMP_PERSONAL_ACCESS_TOKEN') and - os.environ.get('ROMP_USERNAME') - ) - - if not has_envt: - raise Exception( - 'ROMP_BUILD_REQUEST_URL, ROMP_DEFINITION_ID, ' - 'ROMP_PERSONAL_ACCESS_TOKEN and ROMP_USERNAME ' - 'are required enironment variables.') - - -def build_wheels_locally_if_pure_python( - requirements_specifier, - with_deps=False, - verbose=False, +def download_wheels_with_pip( + requirements_specifiers=tuple(), + requirements_files=tuple(), + environment=None, dest_dir=THIRDPARTY_DIR, + index_url=PYPI_SIMPLE_URL, + links_url=ABOUT_LINKS_URL, ): """ - Given pip `requirements_specifier` string (such as package names or as - name==version), build the corresponding binary wheel(s) locally. - - If all these are "pure" Python wheels that run on all Python 3 versions and - operating systems, copy them back in `dest_dir` if they do not exists there - - Return a tuple of (True if all wheels are "pure", list of built wheel file names) + Fetch binary wheel(s) using pip for the ``envt`` Environment given a list of + pip ``requirements_files`` and a list of ``requirements_specifiers`` string + (such as package names or as name==version). + Return a tuple of (list of downloaded files, error string). + Do NOT fail on errors, but return an error message on failure. """ - deps = [] if with_deps else ['--no-deps'] - verbose = ['--verbose'] if verbose else [] - wheel_dir = tempfile.mkdtemp(prefix='scancode-release-wheels-local-') cli_args = [ - 'pip', 'wheel', - '--wheel-dir', wheel_dir, - ] + deps + verbose + [ - requirements_specifier - ] - - print(f'Building local wheels for: {requirements_specifier}') - print(f'Using command:', ' '.join(cli_args)) - call(cli_args) - - built = os.listdir(wheel_dir) - if not built: - return [] - - all_pure = all(is_pure_wheel(bwfn) for bwfn in built) - - if not all_pure: - print(f' Some wheels are not pure') - - print(f' Copying local wheels') - pure_built = [] - for bwfn in built: - owfn = os.path.join(dest_dir, bwfn) - if not os.path.exists(owfn): - nwfn = os.path.join(wheel_dir, bwfn) - fileutils.copyfile(nwfn, owfn) - pure_built.append(bwfn) - print(f' Built local wheel: {bwfn}') - return all_pure, pure_built - - -# TODO: Use me -def optimize_wheel(wheel_filename, dest_dir=THIRDPARTY_DIR): - """ - Optimize a wheel named `wheel_filename` in `dest_dir` such as renaming its - tags for PyPI compatibility and making it smaller if possible. Return the - name of the new wheel if renamed or the existing new name otherwise. - """ - if is_pure_wheel(wheel_filename): - print(f'Pure wheel: {wheel_filename}, nothing to do.') - return wheel_filename - - original_wheel_loc = os.path.join(dest_dir, wheel_filename) - wheel_dir = tempfile.mkdtemp(prefix='scancode-release-wheels-') - awargs = [ - 'auditwheel', - 'addtag', - '--wheel-dir', wheel_dir, - original_wheel_loc + "pip", + "download", + "--only-binary", + ":all:", + "--dest", + dest_dir, + "--index-url", + index_url, + "--find-links", + links_url, + "--no-color", + "--progress-bar", + "off", + "--no-deps", + "--no-build-isolation", + "--verbose", + # "--verbose", ] - call(awargs) - - audited = os.listdir(wheel_dir) - if not audited: - # cannot optimize wheel - return wheel_filename - - assert len(audited) == 1 - new_wheel_name = audited[0] - - new_wheel_loc = os.path.join(wheel_dir, new_wheel_name) - - # this needs to go now - os.remove(original_wheel_loc) - - if new_wheel_name == wheel_filename: - os.rename(new_wheel_loc, original_wheel_loc) - return wheel_filename - - new_wheel = Wheel.from_filename(new_wheel_name) - non_pypi_plats = utils_pypi_supported_tags.validate_platforms_for_pypi(new_wheel.platforms) - new_wheel.platforms = [p for p in new_wheel.platforms if p not in non_pypi_plats] - if not new_wheel.platforms: - print(f'Cannot make wheel PyPI compatible: {original_wheel_loc}') - os.rename(new_wheel_loc, original_wheel_loc) - return wheel_filename - - new_wheel_cleaned_filename = new_wheel.to_filename() - new_wheel_cleaned_loc = os.path.join(dest_dir, new_wheel_cleaned_filename) - os.rename(new_wheel_loc, new_wheel_cleaned_loc) - return new_wheel_cleaned_filename + if environment: + eopts = environment.get_pip_cli_options() + cli_args.extend(eopts) + else: + print("WARNING: no download environment provided.") -def extract_tar(location, dest_dir=THIRDPARTY_DIR,): - """ - Extract a tar archive at `location` in the `dest_dir` directory. Return a - list of extracted locations (either directories or files). - """ - with open(location, 'rb') as fi: - with tarfile.open(fileobj=fi) as tar: - members = list(tar.getmembers()) - tar.extractall(dest_dir, members=members) + cli_args.extend(requirements_specifiers) + for req_file in requirements_files: + cli_args.extend(["--requirement", req_file]) - return [os.path.basename(ti.name) for ti in members - if ti.type == tarfile.REGTYPE] + if TRACE: + print(f"Downloading wheels using command:", " ".join(cli_args)) + existing = set(os.listdir(dest_dir)) + error = False + try: + returncode, _stdout, stderr = call(cli_args, verbose=True) + if returncode != 0: + error = stderr + except Exception as e: + error = str(e) -def fetch_package_wheel(name, version, environment, dest_dir=THIRDPARTY_DIR): - """ - Fetch the binary wheel for package `name` and `version` and save in - `dest_dir`. Use the provided `environment` Environment to determine which - specific wheel to fetch. + if error: + print() + print("###########################################################################") + print("##################### Failed to fetch all wheels ##########################") + print("###########################################################################") + print(error) + print() + print("###########################################################################") - Return the fetched wheel file name on success or None if it was not fetched. - Trying fetching from our own remote repo, then from PyPI. - """ - wheel_filename = None - remote_package = get_remote_package(name=name, version=version) - if remote_package: - wheel_filename = remote_package.fetch_wheel( - environment=environment, dest_dir=dest_dir) - if wheel_filename: - return wheel_filename + downloaded = existing ^ set(os.listdir(dest_dir)) + return sorted(downloaded), error - pypi_package = get_pypi_package(name=name, version=version) - if pypi_package: - wheel_filename = pypi_package.fetch_wheel( - environment=environment, dest_dir=dest_dir) - return wheel_filename +################################################################################ +# +# Functions to check for problems +# +################################################################################ def check_about(dest_dir=THIRDPARTY_DIR): try: - subprocess.check_output(f'venv/bin/about check {dest_dir}'.split()) + subprocess.check_output(f"venv/bin/about check {dest_dir}".split()) except subprocess.CalledProcessError as cpe: print() - print('Invalid ABOUT files:') - print(cpe.output.decode('utf-8', errors='replace')) + print("Invalid ABOUT files:") + print(cpe.output.decode("utf-8", errors="replace")) def find_problems( @@ -2947,32 +2239,36 @@ def find_problems( for package in local_packages: if report_missing_sources and not package.sdist: - print(f'{package.name}=={package.version}: Missing source distribution.') + print(f"{package.name}=={package.version}: Missing source distribution.") if report_missing_wheels and not package.wheels: - print(f'{package.name}=={package.version}: Missing wheels.') + print(f"{package.name}=={package.version}: Missing wheels.") for dist in package.get_distributions(): dist.load_about_data(dest_dir=dest_dir) abpth = os.path.abspath(os.path.join(dest_dir, dist.about_filename)) if not dist.has_key_metadata(): - print(f' Missing key ABOUT data in file://{abpth}') - if 'classifiers' in dist.extra_data: - print(f' Dangling classifiers data in file://{abpth}') + print(f" Missing key ABOUT data in file://{abpth}") + if "classifiers" in dist.extra_data: + print(f" Dangling classifiers data in file://{abpth}") if not dist.validate_checksums(dest_dir): - print(f' Invalid checksums in file://{abpth}') + print(f" Invalid checksums in file://{abpth}") if not dist.sha1 and dist.md5: - print(f' Missing checksums in file://{abpth}') + print(f" Missing checksums in file://{abpth}") check_about(dest_dir=dest_dir) def compute_normalized_license_expression(declared_licenses): + """ + Return a normalized license expression or None. + """ if not declared_licenses: return try: from packagedcode import pypi + return pypi.compute_normalized_license(declared_licenses) except ImportError: # Scancode is not installed, clean and join all the licenses lics = [python_safe_name(l).lower() for l in declared_licenses] - return ' AND '.join(lics).lower() + return " AND ".join(lics).lower() diff --git a/pyproject.toml b/pyproject.toml index 1e10f32..cde7907 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,7 +35,9 @@ norecursedirs = [ "tmp", "venv", "tests/data", - ".eggs" + ".eggs", + "src/*/data", + "tests/*/data" ] python_files = "*.py" diff --git a/requirements-dev.txt b/requirements-dev.txt index 4dcff74..fe92ed8 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,9 +1,24 @@ +aboutcode-toolkit==7.0.1 +bleach==4.1.0 +build==0.7.0 +commonmark==0.9.1 +docutils==0.18.1 +et-xmlfile==1.1.0 execnet==1.9.0 iniconfig==1.1.1 -packaging==21.0 -py==1.10.0 -pyparsing==2.4.7 -pytest==6.2.5 -pytest-forked==1.3.0 -pytest-xdist==2.4.0 -toml==0.10.2 \ No newline at end of file +jeepney==0.7.1 +keyring==23.4.1 +openpyxl==3.0.9 +pep517==0.12.0 +pkginfo==1.8.2 +py==1.11.0 +pytest==7.0.1 +pytest-forked==1.4.0 +pytest-xdist==2.5.0 +readme-renderer==34.0 +requests-toolbelt==0.9.1 +rfc3986==1.5.0 +rich==12.3.0 +secretstorage==3.3.2 +tomli==1.2.3 +twine==3.8.0 diff --git a/requirements.txt b/requirements.txt index c5d3aad..627778d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,36 +1,79 @@ -attrs==21.2.0 -beautifulsoup4==4.10.0 +attrs==21.4.0 +banal==1.0.6 +beautifulsoup4==4.11.1 binaryornot==0.4.4 -certifi==2021.5.30 -cffi==1.14.6 +boolean.py==3.8 +certifi==2021.10.8 +cffi==1.15.0 chardet==4.0.0 -charset-normalizer==2.0.6 -click==8.0.1 -commoncode==30.0.0 -cryptography==35.0.0 +charset-normalizer==2.0.12 +click==8.0.4 +colorama==0.4.4 +commoncode==30.2.0 +construct==2.10.68 +container-inspector==30.0.0 +cryptography==36.0.2 +debian-inspector==30.0.0 +dockerfile-parse==1.2.0 +dparse2==0.6.1 extractcode-7z==16.5.210531 extractcode-libarchive== -idna==3.2 -importlib-metadata==4.8.1 -intbitset==2.4.1 +fasteners==0.17.3 +fingerprints==1.0.3 +ftfy==6.0.3 +future==0.18.2 +gemfileparser==0.8.0 +html5lib==1.1 +idna==3.3 +importlib-metadata==4.8.3 +inflection==0.5.1 +intbitset==3.0.1 +isodate==0.6.1 +jaraco.functools==3.4.0 +javaproperties==0.8.1 +Jinja2==3.0.3 +jsonstreams==0.6.0 +license-expression==21.6.14 +lxml==4.8.0 +MarkupSafe==2.0.1 +more-itertools==8.13.0 +normality==2.3.3 +packagedcode-msitools==0.101.210706 +packageurl-python==0.9.9 +packaging==21.3 +parameter-expansion-patched==0.3.1 patch==1.16 -pdfminer.six==20201018 -pip==21.2.4 +pdfminer.six==20220506 +pefile==2021.9.3 +pip-requirements-parser==31.2.0 +pkginfo2==30.0.0 pluggy==1.0.0 plugincode==21.1.21 -pycparser==2.20 -PyYAML==5.4.1 -requests==2.26.0 +ply==3.11 +publicsuffix2==2.20191221 +pyahocorasick==2.0.0b1 +pycparser==2.21 +pygmars==0.7.0 +Pygments==2.12.0 +pymaven-patch==0.3.0 +pyparsing==3.0.8 +pytz==2022.1 +PyYAML==6.0 +rdflib==5.0.0 +regipy==2.2.2 +requests==2.27.1 +rpm-inspector-rpm== saneyaml==0.5.2 -setuptools==58.1.0 six==1.16.0 -sortedcontainers==2.4.0 -soupsieve==2.2.1 +soupsieve==2.3.1 +spdx-tools==0.7.0a3 text-unidecode==1.3 +toml==0.10.2 typecode==21.6.1 typecode-libmagic==5.39.210531 -typing==3.6.6 -typing-extensions== -urllib3==1.26.7 -wheel==0.37.0 +urllib3==1.26.9 +urlpy==0.5 +wcwidth==0.2.5 +webencodings==0.5.1 +xmltodict==0.12.0 zipp==3.6.0 diff --git a/setup.cfg b/setup.cfg index 074ef86..e4eeb8e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,25 +1,29 @@ [metadata] -license_files = - apache-2.0.LICENSE - NOTICE - AUTHORS.rst - CHANGELOG.rst name = extractcode -author = nexB. Inc. and others -author_email = info@aboutcode.org license = Apache-2.0 # description must be on ONE line https://github.com/pypa/setuptools/issues/1390 description = A mostly universal archive extractor using 7zip, libarchive and the Python standard library for reliable archive extraction. long_description = file:README.rst +long_description_content_type = text/x-rst url = https://github.com/nexB/extractcode + +author = nexB. Inc. and others +author_email = info@aboutcode.org + classifiers = Development Status :: 5 - Production/Stable Intended Audience :: Developers Programming Language :: Python :: 3 Programming Language :: Python :: 3 :: Only + Programming Language :: Python :: 3.6 + Programming Language :: Python :: 3.7 + Programming Language :: Python :: 3.8 + Programming Language :: Python :: 3.9 + Programming Language :: Python :: 3.10 Topic :: Software Development Topic :: Utilities + keywords = utilities archive @@ -49,23 +53,32 @@ keywords = patch scancode-toolkit +license_files = + apache-2.0.LICENSE + NOTICE + AUTHORS.rst + CHANGELOG.rst + CODE_OF_CONDUCT.rst + [options] -package_dir= +package_dir = =src -packages=find: +packages = find: include_package_data = true zip_safe = false + +python_requires = >=3.6.* + install_requires = attrs >= 18.1, !=20.1.0 - commoncode >= 21.5.25 + commoncode >= 30.2.0 plugincode >= 21.1.21 typecode >= 21.6.1 - patch >= 1.16 - -setup_requires = setuptools_scm[toml] >= 4 + six [options.packages.find] -where=src +where = src + [options.extras_require] full = @@ -73,15 +86,19 @@ full = extractcode_libarchive >= typecode[full] >= 21.6.1 +patch = + patch >= 1.16 + testing = - # upstream - pytest >= 6 + pytest >= 6, != 7.0.0 pytest-xdist >= 2 + aboutcode-toolkit >= 6.0.0 + black -docs= - Sphinx>=3.3.1 - sphinx-rtd-theme>=0.5.0 - doc8>=0.8.1 +docs = + Sphinx >= 3.3.1 + sphinx-rtd-theme >= 0.5.0 + doc8 >= 0.8.1 [options.entry_points] console-scripts = diff --git a/src/extractcode/archive.py b/src/extractcode/archive.py index 81135b3..d52399e 100644 --- a/src/extractcode/archive.py +++ b/src/extractcode/archive.py @@ -1194,5 +1194,11 @@ def try_to_extract(location, target_dir, extractor): QCOWHandler, VMDKHandler, VirtualBoxHandler, - PatchHandler, ] + +# only support extracting patches if patch is installed. This is not a default +try: + import patch as _pythonpatch + archive_handlers.append(PatchHandler) +except: + pass diff --git a/src/extractcode/patch.py b/src/extractcode/patch.py index 0225e45..3588695 100644 --- a/src/extractcode/patch.py +++ b/src/extractcode/patch.py @@ -11,8 +11,6 @@ import logging import os.path -import patch as pythonpatch - from commoncode import paths from commoncode import fileutils from commoncode import text @@ -136,6 +134,7 @@ def patch_info(location): Raise an exception if the file is not a patch file or cannot be parsed. """ + import patch as pythonpatch patchset = pythonpatch.fromfile(location) if not patchset: msg = 'Unable to parse patch file: %(location)s' % locals() diff --git a/tests/test_patch.py b/tests/test_patch.py index 5a70951..c3f4008 100644 --- a/tests/test_patch.py +++ b/tests/test_patch.py @@ -17,6 +17,12 @@ from extractcode import patch +try: + import patch as _pythonpatch +except ImportError: + import pytest + pytestmark = pytest.mark.skipif(True, reason="Run only if patch is installed.") + class TestIsPatch(FileBasedTesting): test_data_dir = os.path.join(os.path.dirname(__file__), 'data') diff --git a/tests/test_skeleton_codestyle.py b/tests/test_skeleton_codestyle.py new file mode 100644 index 0000000..2eb6e55 --- /dev/null +++ b/tests/test_skeleton_codestyle.py @@ -0,0 +1,36 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# ScanCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/skeleton for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import subprocess +import unittest +import configparser + + +class BaseTests(unittest.TestCase): + def test_skeleton_codestyle(self): + """ + This test shouldn't run in proliferated repositories. + """ + setup_cfg = configparser.ConfigParser() + setup_cfg.read("setup.cfg") + if setup_cfg["metadata"]["name"] != "skeleton": + return + + args = "venv/bin/black --check -l 100 setup.py etc tests" + try: + subprocess.check_output(args.split()) + except subprocess.CalledProcessError as e: + print("===========================================================") + print(e.output) + print("===========================================================") + raise Exception( + "Black style check failed; please format the code using:\n" + " python -m black -l 100 setup.py etc tests", + e.output, + ) from e