Skip to content

Commit

Permalink
Update package assembly in conda installations
Browse files Browse the repository at this point in the history
Parse conda metadata JSON manifests and use the package data
and files information present to improve conda package assembly.

Reference: #4083
Signed-off-by: Ayan Sinha Mahapatra <[email protected]>
  • Loading branch information
AyanSinhaMahapatra committed Jan 13, 2025
1 parent ce6251c commit a534fcf
Show file tree
Hide file tree
Showing 13 changed files with 1,237 additions and 3 deletions.
1 change: 1 addition & 0 deletions src/packagedcode/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@
cocoapods.PodfileLockHandler,
cocoapods.PodfileHandler,

conda.CondaMetaJsonHandler,
conda.CondaMetaYamlHandler,
conda.CondaYamlHandler,

Expand Down
269 changes: 267 additions & 2 deletions src/packagedcode/conda.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#

import io
import json

import saneyaml
from packageurl import PackageURL
Expand All @@ -23,7 +24,267 @@
See https://repo.continuum.io/pkgs/free for examples.
"""

# TODO: there are likely other package data files for Conda

class CondaBaseHandler(models.DatafileHandler):
"""
Assemble package data and files present in conda manifests present in the
usual structure of a conda installation. Here the manifests which are
assembled together are:
- Conda metadata JSON (CondaMetaJsonHandler)
- Conda meta.yaml recipe (CondaMetaYamlHandler)
Example paths for these manifests:
/opt/conda/conda-meta/requests-2.32.3-py312h06a4308_1.json
/opt/conda/pkgs/requests-2.32.3-py312h06a4308_1/info/recipe/meta.yaml
"""

@classmethod
def assemble(cls, package_data, resource, codebase, package_adder=models.add_to_package):

if codebase.has_single_resource:
yield from models.DatafileHandler.assemble(package_data, resource, codebase, package_adder)
return

# We do not have any package data detected here
if not resource.package_data:
return

# If this is a Conda meta.yaml, try to find the corresponding metadata JSON
# and if present, run assembly on the metadata resource
if CondaMetaYamlHandler.is_datafile(resource.location):
conda_meta_json = cls.find_conda_meta_json_resource(resource, codebase)
if conda_meta_json:
package_data_meta_json, = conda_meta_json.package_data
yield from cls.assemble(
package_data=package_data_meta_json,
resource=conda_meta_json,
codebase=codebase,
package_adder=package_adder,
)

# corresponding metadata JSON does not exist, so handle this meta.yaml
elif package_data.purl:
package = models.Package.from_package_data(
package_data=package_data,
datafile_path=resource.path,
)
package.populate_license_fields()
yield package

CondaMetaYamlHandler.assign_package_to_resources(
package=package,
resource=resource,
codebase=codebase,
package_adder=package_adder,
)
yield resource

return

# For a conda metadata JSON, try to find the corresponding meta.yaml and
# assemble a single package out of these if it exists
conda_meta_yaml = cls.find_conda_meta_yaml_resource(resource, codebase)
if not package_data.purl:
yield resource
return

package = models.Package.from_package_data(
package_data=package_data,
datafile_path=resource.path,
)
if conda_meta_yaml:
conda_meta_yaml_package_data, = conda_meta_yaml.package_data
package.update(
package_data=conda_meta_yaml_package_data,
datafile_path=conda_meta_yaml.path,
)
cls.assign_package_to_resources(
package=package,
resource=conda_meta_yaml,
codebase=codebase,
package_adder=package_adder,
)
yield conda_meta_yaml

package.populate_license_fields()
yield package

cls.assign_package_to_resources(
package=package,
resource=resource,
codebase=codebase,
package_adder=package_adder,
)

# we yield this as we do not want this further processed
yield resource

# Get the file paths present in the metadata JSON and assign them to
# the package created from it
extracted_package_dir = package_data.extra_data.get('extracted_package_dir')
files = package_data.extra_data.get('files')

if not extracted_package_dir or not files:
return

conda_metadata_dir = resource.parent(codebase)
if not conda_metadata_dir:
return

conda_root_dir = conda_metadata_dir.parent(codebase)
if not conda_root_dir:
return

root_path_segment, _, package_dir = extracted_package_dir.rpartition("/pkgs/")
if not conda_root_dir.path.endswith(root_path_segment):
return

package_dir_path = f"{conda_root_dir.path}/pkgs/{package_dir}"
package_dir_resource = codebase.get_resource(path=package_dir_path)
if package_dir_resource:
cls.assign_package_to_resources(
package=package,
resource=package_dir_resource,
codebase=codebase,
package_adder=package_adder,
)

conda_package_path = f"{conda_root_dir.path}/pkgs/{package_dir}.conda"
conda_package_resource = codebase.get_resource(path=conda_package_path)
if conda_package_resource:
cls.assign_package_to_resources(
package=package,
resource=conda_package_resource,
codebase=codebase,
package_adder=package_adder,
)

for file_path in files:
full_file_path = f"{conda_root_dir.path}/{file_path}"
file_resource = codebase.get_resource(path=full_file_path)
if file_resource:
cls.assign_package_to_resources(
package=package,
resource=file_resource,
codebase=codebase,
package_adder=package_adder,
)

@classmethod
def check_valid_packages_dir_name(cls, package_dir_resource, resource, codebase):
"""
Return the name of the `package_dir_resource`, if it is valid, i.e.
the package (name, version) data present in `resource` matches the
directory name, and the package directory is present in it's usual
location in a conda installation.
"""
package_dir_parent = package_dir_resource.parent(codebase)

meta_yaml_package_data, = resource.package_data
name = meta_yaml_package_data.get("name")
version = meta_yaml_package_data.get("version")
if f"{name}-{version}" in package_dir_resource.name and (
package_dir_parent and "pkgs" in package_dir_parent.name
):
return package_dir_resource.name

@classmethod
def find_conda_meta_json_resource(cls, resource, codebase):
"""
Given a resource for a conda meta.yaml resource, find if it has any
corresponding metadata JSON located inside the conda-meta/ directory,
and return the resource if they exist, else return None.
"""
package_dir_resource = CondaMetaYamlHandler.get_conda_root(resource, codebase)
if not package_dir_resource or not resource.package_data:
return

package_dir_name = cls.check_valid_packages_dir_name(
package_dir_resource=package_dir_resource,
resource=resource,
codebase=codebase,
)
if not package_dir_name:
return

root_resource = package_dir_resource.parent(codebase).parent(codebase)
if not root_resource:
return

root_resource_path = root_resource.path
conda_meta_path = f"{root_resource_path}/conda-meta/{package_dir_name}.json"
conda_meta_resource = codebase.get_resource(path=conda_meta_path)

if conda_meta_resource and conda_meta_resource.package_data:
return conda_meta_resource

@classmethod
def find_conda_meta_yaml_resource(cls, resource, codebase):
"""
Given a resource for a metadata JSON located inside the conda-meta/
directory, find if it has any corresponding conda meta.yaml, and return
the resource if they exist, else return None.
"""
package_dir_name, _json, _ = resource.name.rpartition(".json")
parent_resource = resource.parent(codebase)
if not parent_resource and not parent_resource.name == "conda-meta":
return

root_resource = parent_resource.parent(codebase)
if not root_resource:
return

root_resource_path = root_resource.path
package_dir_path = f"{root_resource_path}/pkgs/{package_dir_name}/"
package_dir_resource = codebase.get_resource(path=package_dir_path)
if not package_dir_resource:
return

meta_yaml_path = f"{package_dir_path}info/recipe/meta.yaml"
meta_yaml_resource = codebase.get_resource(path=meta_yaml_path)
if meta_yaml_resource and meta_yaml_resource.package_data:
return meta_yaml_resource


class CondaMetaJsonHandler(CondaBaseHandler):
datasource_id = 'conda_meta_json'
path_patterns = ('*conda-meta/*.json',)
default_package_type = 'conda'
default_primary_language = 'Python'
description = 'Conda metadata JSON in rootfs'
documentation_url = 'https://docs.conda.io/'

@classmethod
def parse(cls, location, package_only=False):
with io.open(location, encoding='utf-8') as loc:
conda_metadata = json.load(loc)

name = conda_metadata.get('name')
version = conda_metadata.get('version')
extracted_license_statement = conda_metadata.get('license')
download_url = conda_metadata.get('url')

extra_data_fields = ['requested_spec', 'channel']
package_file_fields = ['extracted_package_dir', 'files', 'package_tarball_full_path']
other_package_fields = ['size', 'md5', 'sha256']

extra_data = {}
for metadata_field in extra_data_fields + package_file_fields:
extra_data[metadata_field] = conda_metadata.get(metadata_field)

package_data = dict(
datasource_id=cls.datasource_id,
type=cls.default_package_type,
name=name,
version=version,
extracted_license_statement=extracted_license_statement,
download_url=download_url,
extra_data=extra_data,
)
for package_field in other_package_fields:
package_data[package_field] = conda_metadata.get(package_field)
yield models.PackageData.from_data(package_data, package_only)


class CondaYamlHandler(BaseDependencyFileHandler):
datasource_id = 'conda_yaml'
Expand Down Expand Up @@ -55,7 +316,7 @@ def parse(cls, location, package_only=False):
yield models.PackageData.from_data(package_data, package_only)


class CondaMetaYamlHandler(models.DatafileHandler):
class CondaMetaYamlHandler(CondaBaseHandler):
datasource_id = 'conda_meta_yaml'
default_package_type = 'conda'
path_patterns = ('*/meta.yaml',)
Expand All @@ -67,13 +328,17 @@ def get_conda_root(cls, resource, codebase):
"""
Return a root Resource given a meta.yaml ``resource``.
"""
if not resource:
return

# the root is either the parent or further up for yaml stored under
# an "info" dir. We support extractcode extraction.
# in a source repo it would be in <repo>/conda.recipe/meta.yaml
paths = (
'info/recipe.tar-extract/recipe/meta.yaml',
'info/recipe/recipe/meta.yaml',
'conda.recipe/meta.yaml',
'info/recipe/meta.yaml',
)
res = resource
for pth in paths:
Expand Down
Loading

0 comments on commit a534fcf

Please sign in to comment.