Skip to content

Commit a534fcf

Browse files
Update package assembly in conda installations
Parse conda metadata JSON manifests and use the package data and files information present to improve conda package assembly. Reference: #4083 Signed-off-by: Ayan Sinha Mahapatra <[email protected]>
1 parent ce6251c commit a534fcf

File tree

13 files changed

+1237
-3
lines changed

13 files changed

+1237
-3
lines changed

src/packagedcode/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@
7373
cocoapods.PodfileLockHandler,
7474
cocoapods.PodfileHandler,
7575

76+
conda.CondaMetaJsonHandler,
7677
conda.CondaMetaYamlHandler,
7778
conda.CondaYamlHandler,
7879

src/packagedcode/conda.py

Lines changed: 267 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#
99

1010
import io
11+
import json
1112

1213
import saneyaml
1314
from packageurl import PackageURL
@@ -23,7 +24,267 @@
2324
See https://repo.continuum.io/pkgs/free for examples.
2425
"""
2526

26-
# TODO: there are likely other package data files for Conda
27+
28+
class CondaBaseHandler(models.DatafileHandler):
29+
"""
30+
Assemble package data and files present in conda manifests present in the
31+
usual structure of a conda installation. Here the manifests which are
32+
assembled together are:
33+
- Conda metadata JSON (CondaMetaJsonHandler)
34+
- Conda meta.yaml recipe (CondaMetaYamlHandler)
35+
36+
Example paths for these manifests:
37+
/opt/conda/conda-meta/requests-2.32.3-py312h06a4308_1.json
38+
/opt/conda/pkgs/requests-2.32.3-py312h06a4308_1/info/recipe/meta.yaml
39+
"""
40+
41+
@classmethod
42+
def assemble(cls, package_data, resource, codebase, package_adder=models.add_to_package):
43+
44+
if codebase.has_single_resource:
45+
yield from models.DatafileHandler.assemble(package_data, resource, codebase, package_adder)
46+
return
47+
48+
# We do not have any package data detected here
49+
if not resource.package_data:
50+
return
51+
52+
# If this is a Conda meta.yaml, try to find the corresponding metadata JSON
53+
# and if present, run assembly on the metadata resource
54+
if CondaMetaYamlHandler.is_datafile(resource.location):
55+
conda_meta_json = cls.find_conda_meta_json_resource(resource, codebase)
56+
if conda_meta_json:
57+
package_data_meta_json, = conda_meta_json.package_data
58+
yield from cls.assemble(
59+
package_data=package_data_meta_json,
60+
resource=conda_meta_json,
61+
codebase=codebase,
62+
package_adder=package_adder,
63+
)
64+
65+
# corresponding metadata JSON does not exist, so handle this meta.yaml
66+
elif package_data.purl:
67+
package = models.Package.from_package_data(
68+
package_data=package_data,
69+
datafile_path=resource.path,
70+
)
71+
package.populate_license_fields()
72+
yield package
73+
74+
CondaMetaYamlHandler.assign_package_to_resources(
75+
package=package,
76+
resource=resource,
77+
codebase=codebase,
78+
package_adder=package_adder,
79+
)
80+
yield resource
81+
82+
return
83+
84+
# For a conda metadata JSON, try to find the corresponding meta.yaml and
85+
# assemble a single package out of these if it exists
86+
conda_meta_yaml = cls.find_conda_meta_yaml_resource(resource, codebase)
87+
if not package_data.purl:
88+
yield resource
89+
return
90+
91+
package = models.Package.from_package_data(
92+
package_data=package_data,
93+
datafile_path=resource.path,
94+
)
95+
if conda_meta_yaml:
96+
conda_meta_yaml_package_data, = conda_meta_yaml.package_data
97+
package.update(
98+
package_data=conda_meta_yaml_package_data,
99+
datafile_path=conda_meta_yaml.path,
100+
)
101+
cls.assign_package_to_resources(
102+
package=package,
103+
resource=conda_meta_yaml,
104+
codebase=codebase,
105+
package_adder=package_adder,
106+
)
107+
yield conda_meta_yaml
108+
109+
package.populate_license_fields()
110+
yield package
111+
112+
cls.assign_package_to_resources(
113+
package=package,
114+
resource=resource,
115+
codebase=codebase,
116+
package_adder=package_adder,
117+
)
118+
119+
# we yield this as we do not want this further processed
120+
yield resource
121+
122+
# Get the file paths present in the metadata JSON and assign them to
123+
# the package created from it
124+
extracted_package_dir = package_data.extra_data.get('extracted_package_dir')
125+
files = package_data.extra_data.get('files')
126+
127+
if not extracted_package_dir or not files:
128+
return
129+
130+
conda_metadata_dir = resource.parent(codebase)
131+
if not conda_metadata_dir:
132+
return
133+
134+
conda_root_dir = conda_metadata_dir.parent(codebase)
135+
if not conda_root_dir:
136+
return
137+
138+
root_path_segment, _, package_dir = extracted_package_dir.rpartition("/pkgs/")
139+
if not conda_root_dir.path.endswith(root_path_segment):
140+
return
141+
142+
package_dir_path = f"{conda_root_dir.path}/pkgs/{package_dir}"
143+
package_dir_resource = codebase.get_resource(path=package_dir_path)
144+
if package_dir_resource:
145+
cls.assign_package_to_resources(
146+
package=package,
147+
resource=package_dir_resource,
148+
codebase=codebase,
149+
package_adder=package_adder,
150+
)
151+
152+
conda_package_path = f"{conda_root_dir.path}/pkgs/{package_dir}.conda"
153+
conda_package_resource = codebase.get_resource(path=conda_package_path)
154+
if conda_package_resource:
155+
cls.assign_package_to_resources(
156+
package=package,
157+
resource=conda_package_resource,
158+
codebase=codebase,
159+
package_adder=package_adder,
160+
)
161+
162+
for file_path in files:
163+
full_file_path = f"{conda_root_dir.path}/{file_path}"
164+
file_resource = codebase.get_resource(path=full_file_path)
165+
if file_resource:
166+
cls.assign_package_to_resources(
167+
package=package,
168+
resource=file_resource,
169+
codebase=codebase,
170+
package_adder=package_adder,
171+
)
172+
173+
@classmethod
174+
def check_valid_packages_dir_name(cls, package_dir_resource, resource, codebase):
175+
"""
176+
Return the name of the `package_dir_resource`, if it is valid, i.e.
177+
the package (name, version) data present in `resource` matches the
178+
directory name, and the package directory is present in it's usual
179+
location in a conda installation.
180+
"""
181+
package_dir_parent = package_dir_resource.parent(codebase)
182+
183+
meta_yaml_package_data, = resource.package_data
184+
name = meta_yaml_package_data.get("name")
185+
version = meta_yaml_package_data.get("version")
186+
if f"{name}-{version}" in package_dir_resource.name and (
187+
package_dir_parent and "pkgs" in package_dir_parent.name
188+
):
189+
return package_dir_resource.name
190+
191+
@classmethod
192+
def find_conda_meta_json_resource(cls, resource, codebase):
193+
"""
194+
Given a resource for a conda meta.yaml resource, find if it has any
195+
corresponding metadata JSON located inside the conda-meta/ directory,
196+
and return the resource if they exist, else return None.
197+
"""
198+
package_dir_resource = CondaMetaYamlHandler.get_conda_root(resource, codebase)
199+
if not package_dir_resource or not resource.package_data:
200+
return
201+
202+
package_dir_name = cls.check_valid_packages_dir_name(
203+
package_dir_resource=package_dir_resource,
204+
resource=resource,
205+
codebase=codebase,
206+
)
207+
if not package_dir_name:
208+
return
209+
210+
root_resource = package_dir_resource.parent(codebase).parent(codebase)
211+
if not root_resource:
212+
return
213+
214+
root_resource_path = root_resource.path
215+
conda_meta_path = f"{root_resource_path}/conda-meta/{package_dir_name}.json"
216+
conda_meta_resource = codebase.get_resource(path=conda_meta_path)
217+
218+
if conda_meta_resource and conda_meta_resource.package_data:
219+
return conda_meta_resource
220+
221+
@classmethod
222+
def find_conda_meta_yaml_resource(cls, resource, codebase):
223+
"""
224+
Given a resource for a metadata JSON located inside the conda-meta/
225+
directory, find if it has any corresponding conda meta.yaml, and return
226+
the resource if they exist, else return None.
227+
"""
228+
package_dir_name, _json, _ = resource.name.rpartition(".json")
229+
parent_resource = resource.parent(codebase)
230+
if not parent_resource and not parent_resource.name == "conda-meta":
231+
return
232+
233+
root_resource = parent_resource.parent(codebase)
234+
if not root_resource:
235+
return
236+
237+
root_resource_path = root_resource.path
238+
package_dir_path = f"{root_resource_path}/pkgs/{package_dir_name}/"
239+
package_dir_resource = codebase.get_resource(path=package_dir_path)
240+
if not package_dir_resource:
241+
return
242+
243+
meta_yaml_path = f"{package_dir_path}info/recipe/meta.yaml"
244+
meta_yaml_resource = codebase.get_resource(path=meta_yaml_path)
245+
if meta_yaml_resource and meta_yaml_resource.package_data:
246+
return meta_yaml_resource
247+
248+
249+
class CondaMetaJsonHandler(CondaBaseHandler):
250+
datasource_id = 'conda_meta_json'
251+
path_patterns = ('*conda-meta/*.json',)
252+
default_package_type = 'conda'
253+
default_primary_language = 'Python'
254+
description = 'Conda metadata JSON in rootfs'
255+
documentation_url = 'https://docs.conda.io/'
256+
257+
@classmethod
258+
def parse(cls, location, package_only=False):
259+
with io.open(location, encoding='utf-8') as loc:
260+
conda_metadata = json.load(loc)
261+
262+
name = conda_metadata.get('name')
263+
version = conda_metadata.get('version')
264+
extracted_license_statement = conda_metadata.get('license')
265+
download_url = conda_metadata.get('url')
266+
267+
extra_data_fields = ['requested_spec', 'channel']
268+
package_file_fields = ['extracted_package_dir', 'files', 'package_tarball_full_path']
269+
other_package_fields = ['size', 'md5', 'sha256']
270+
271+
extra_data = {}
272+
for metadata_field in extra_data_fields + package_file_fields:
273+
extra_data[metadata_field] = conda_metadata.get(metadata_field)
274+
275+
package_data = dict(
276+
datasource_id=cls.datasource_id,
277+
type=cls.default_package_type,
278+
name=name,
279+
version=version,
280+
extracted_license_statement=extracted_license_statement,
281+
download_url=download_url,
282+
extra_data=extra_data,
283+
)
284+
for package_field in other_package_fields:
285+
package_data[package_field] = conda_metadata.get(package_field)
286+
yield models.PackageData.from_data(package_data, package_only)
287+
27288

28289
class CondaYamlHandler(BaseDependencyFileHandler):
29290
datasource_id = 'conda_yaml'
@@ -55,7 +316,7 @@ def parse(cls, location, package_only=False):
55316
yield models.PackageData.from_data(package_data, package_only)
56317

57318

58-
class CondaMetaYamlHandler(models.DatafileHandler):
319+
class CondaMetaYamlHandler(CondaBaseHandler):
59320
datasource_id = 'conda_meta_yaml'
60321
default_package_type = 'conda'
61322
path_patterns = ('*/meta.yaml',)
@@ -67,13 +328,17 @@ def get_conda_root(cls, resource, codebase):
67328
"""
68329
Return a root Resource given a meta.yaml ``resource``.
69330
"""
331+
if not resource:
332+
return
333+
70334
# the root is either the parent or further up for yaml stored under
71335
# an "info" dir. We support extractcode extraction.
72336
# in a source repo it would be in <repo>/conda.recipe/meta.yaml
73337
paths = (
74338
'info/recipe.tar-extract/recipe/meta.yaml',
75339
'info/recipe/recipe/meta.yaml',
76340
'conda.recipe/meta.yaml',
341+
'info/recipe/meta.yaml',
77342
)
78343
res = resource
79344
for pth in paths:

0 commit comments

Comments
 (0)