diff --git a/src/packagedcode/__init__.py b/src/packagedcode/__init__.py index 778d043492..8868310916 100644 --- a/src/packagedcode/__init__.py +++ b/src/packagedcode/__init__.py @@ -18,6 +18,7 @@ from packagedcode import debian from packagedcode import debian_copyright from packagedcode import distro +from packagedcode import dockerfile_ocilabels from packagedcode import conda from packagedcode import conan from packagedcode import cocoapods @@ -95,6 +96,7 @@ debian.DebianSourcePackageTarballHandler, distro.EtcOsReleaseHandler, + dockerfile_ocilabels.DockerOCILabelsHandler, freebsd.CompactManifestHandler, diff --git a/src/packagedcode/dockerfile_ocilabels.py b/src/packagedcode/dockerfile_ocilabels.py new file mode 100644 index 0000000000..4f1351e315 --- /dev/null +++ b/src/packagedcode/dockerfile_ocilabels.py @@ -0,0 +1,56 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# ScanCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/scancode-toolkit for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + + + +import io +from pathlib import Path +from dockerfile_parse import DockerfileParser +from packagedcode import models +from packagedcode import utils +from packagedcode.models import NonAssemblableDatafileHandler +import fnmatch + + +class DockerOCILabelsHandler(NonAssemblableDatafileHandler): + datasource_id = 'dockerfile_oci_labels' + default_package_type = 'docker' + path_patterns = ('Dockerfile', 'containerfile', '*.dockerfile','*.containerfile') + + @classmethod + def parse(cls, location, package_only=False): + """ + Parse a Dockerfile and yield one or more PackageData objects with OCI labels and metadata. + """ + labels = cls.extract_oci_labels_from_dockerfile(location) + license_value = labels.get('org.opencontainers.image.licenses') + if license_value: + license_value = license_value.strip() + package_data = { + 'datasource_id': cls.datasource_id, + 'type': cls.default_package_type, + 'name': labels.get('org.opencontainers.image.title'), + 'version': None, + 'extracted_license_statement':license_value, + 'extra_data': {'labels': labels}, + } + + yield models.PackageData.from_data(package_data, package_only) + + @classmethod + def extract_oci_labels_from_dockerfile(cls, dockerfile_path): + """ + Extract OCI labels from the Dockerfile using DockerfileParser. + """ + labels = {} + parser = DockerfileParser() + with open(dockerfile_path, 'r') as dockerfile: + parser.content = dockerfile.read() + labels = parser.labels + return labels diff --git a/tests/packagedcode/data/docker/test.containerfile b/tests/packagedcode/data/docker/test.containerfile new file mode 100644 index 0000000000..a1ed5fdab5 --- /dev/null +++ b/tests/packagedcode/data/docker/test.containerfile @@ -0,0 +1,11 @@ +#Copied from https://github.com/kubernetes-sigs/blixt/blob + +FROM alpine + +WORKDIR /opt/blixt/ + +LABEL org.opencontainers.image.source=https://github.com/kubernetes-sigs/blixt +LABEL org.opencontainers.image.licenses=GPL-2.0-only,BSD-2-Clause + +COPY dataplane/LICENSE.GPL-2.0 /opt/blixt/LICENSE.GPL-2.0 +COPY dataplane/LICENSE.BSD-2-Clause /opt/blixt/LICENSE.BSD-2-Clause \ No newline at end of file diff --git a/tests/packagedcode/data/docker/test.containerfile-expected.json b/tests/packagedcode/data/docker/test.containerfile-expected.json new file mode 100644 index 0000000000..e08951101e --- /dev/null +++ b/tests/packagedcode/data/docker/test.containerfile-expected.json @@ -0,0 +1,4 @@ +{ + "org.opencontainers.image.source": "https://github.com/kubernetes-sigs/blixt", + "org.opencontainers.image.licenses": "GPL-2.0-only,BSD-2-Clause" +} \ No newline at end of file diff --git a/tests/packagedcode/data/docker/test.containerfile-package.expected.json b/tests/packagedcode/data/docker/test.containerfile-package.expected.json new file mode 100644 index 0000000000..75be712fa2 --- /dev/null +++ b/tests/packagedcode/data/docker/test.containerfile-package.expected.json @@ -0,0 +1,17 @@ +[{ + "datasource_id": "dockerfile_oci_labels", + "type": "docker", + "namespace": null, + "name": null, + "version": null, + "qualifiers": null, + "subpath": null, + "extracted_license_statement": "GPL-2.0-only,BSD-2-Clause", + "primary_language": null, + "extra_data": { + "labels": { + "org.opencontainers.image.source": "https://github.com/kubernetes-sigs/blixt", + "org.opencontainers.image.licenses": "GPL-2.0-only,BSD-2-Clause" + } + } +}] \ No newline at end of file diff --git a/tests/packagedcode/data/docker/test.containerfile-scan.expected.json b/tests/packagedcode/data/docker/test.containerfile-scan.expected.json new file mode 100644 index 0000000000..7435523fcb --- /dev/null +++ b/tests/packagedcode/data/docker/test.containerfile-scan.expected.json @@ -0,0 +1,43 @@ +[{ + "datasource_id": "dockerfile_oci_labels", + "type": "docker", + "namespace": null, + "name": null, + "version": null, + "qualifiers": null, + "subpath": null, + "extracted_license_statement": "GPL-2.0-only,BSD-2-Clause", + "declared_license_expression": "gpl-2.0 AND bsd-2-clause", + "declared_license_expression_spdx": "GPL-2.0 AND BSD-2-Clause", + "license_detections": [ + { + "license_expression": "gpl-2.0", + "matches": [{ + "license_expression": "gpl-2.0", + "start_line": 1, + "end_line": 1, + "from_file": "tests/packagedcode/data/docker/test.containerfile", + "score": 100.0, + "matched_text": "GPL-2.0-only" + }] + }, + { + "license_expression": "bsd-2-clause", + "matches": [{ + "license_expression": "bsd-2-clause", + "start_line": 1, + "end_line": 1, + "from_file": "tests/packagedcode/data/docker/test.containerfile", + "score": 100.0, + "matched_text": "BSD-2-Clause" + }] + } + ], + "primary_language": null, + "extra_data": { + "labels": { + "org.opencontainers.image.source": "https://github.com/kubernetes-sigs/blixt", + "org.opencontainers.image.licenses": "GPL-2.0-only,BSD-2-Clause" + } + } +}] \ No newline at end of file diff --git a/tests/packagedcode/data/docker/test.dockerfile b/tests/packagedcode/data/docker/test.dockerfile new file mode 100644 index 0000000000..edbcbe058c --- /dev/null +++ b/tests/packagedcode/data/docker/test.dockerfile @@ -0,0 +1,21 @@ +#Copied from https://github.com/kanboard/kanboard + +FROM alpine:3.21 +LABEL org.opencontainers.image.source=https://github.com/kanboard/kanboard +LABEL org.opencontainers.image.title=Kanboard +LABEL org.opencontainers.image.description="Kanboard is project management software that focuses on the Kanban methodology" +LABEL org.opencontainers.image.vendor=Kanboard +LABEL org.opencontainers.image.licenses=MIT +LABEL org.opencontainers.image.url=https://kanboard.org +LABEL org.opencontainers.image.documentation=https://docs.kanboard.org +VOLUME /var/www/app/data +VOLUME /var/www/app/plugins +VOLUME /etc/nginx/ssl +EXPOSE 80 443 +ARG VERSION +RUN apk --no-cache --update add ... +ADD . /var/www/app +ADD docker/ / +RUN rm -rf /var/www/app/docker && echo $VERSION > /var/www/app/app/version.txt +ENTRYPOINT ["/usr/local/bin/entrypoint.sh"] +CMD [] \ No newline at end of file diff --git a/tests/packagedcode/data/docker/test.dockerfile-expected.json b/tests/packagedcode/data/docker/test.dockerfile-expected.json new file mode 100644 index 0000000000..26e97c47ab --- /dev/null +++ b/tests/packagedcode/data/docker/test.dockerfile-expected.json @@ -0,0 +1,9 @@ +{ + "org.opencontainers.image.source": "https://github.com/kanboard/kanboard", + "org.opencontainers.image.title": "Kanboard", + "org.opencontainers.image.description": "Kanboard is project management software that focuses on the Kanban methodology", + "org.opencontainers.image.vendor": "Kanboard", + "org.opencontainers.image.licenses": "MIT", + "org.opencontainers.image.url": "https://kanboard.org", + "org.opencontainers.image.documentation": "https://docs.kanboard.org" +} \ No newline at end of file diff --git a/tests/packagedcode/data/docker/test.dockerfile-package.expected.json b/tests/packagedcode/data/docker/test.dockerfile-package.expected.json new file mode 100644 index 0000000000..bbcc928751 --- /dev/null +++ b/tests/packagedcode/data/docker/test.dockerfile-package.expected.json @@ -0,0 +1,23 @@ +[{ + "datasource_id": "dockerfile_oci_labels", + "type": "docker", + "namespace": null, + "name": "Kanboard", + "version": null, + "qualifiers": null, + "subpath": null, + "extracted_license_statement": "MIT", + "primary_language": null, + "extra_data": { + "labels": { + "org.opencontainers.image.source": "https://github.com/kanboard/kanboard", + "org.opencontainers.image.title": "Kanboard", + "org.opencontainers.image.description": "Kanboard is project management software that focuses on the Kanban methodology", + "org.opencontainers.image.vendor": "Kanboard", + "org.opencontainers.image.licenses": "MIT", + "org.opencontainers.image.url": "https://kanboard.org", + "org.opencontainers.image.documentation": "https://docs.kanboard.org" + } + }, + "purl": "pkg:docker/Kanboard" +}] \ No newline at end of file diff --git a/tests/packagedcode/data/docker/test.dockerfile-scan.expected.json b/tests/packagedcode/data/docker/test.dockerfile-scan.expected.json new file mode 100644 index 0000000000..ac926f173a --- /dev/null +++ b/tests/packagedcode/data/docker/test.dockerfile-scan.expected.json @@ -0,0 +1,36 @@ +[{ + "datasource_id": "dockerfile_oci_labels", + "type": "docker", + "namespace": null, + "name": "Kanboard", + "version": null, + "qualifiers": null, + "subpath": null, + "extracted_license_statement": "MIT", + "declared_license_expression": "mit", + "declared_license_expression_spdx": "MIT", + "license_detections": [{ + "license_expression": "mit", + "matches": [{ + "license_expression": "mit", + "start_line": 1, + "end_line": 1, + "from_file": "tests/packagedcode/data/docker/test.dockerfile", + "score": 100.0, + "matched_text": "MIT" + }] + }], + "primary_language": null, + "extra_data": { + "labels": { + "org.opencontainers.image.source": "https://github.com/kanboard/kanboard", + "org.opencontainers.image.title": "Kanboard", + "org.opencontainers.image.description": "Kanboard is project management software that focuses on the Kanban methodology", + "org.opencontainers.image.vendor": "Kanboard", + "org.opencontainers.image.licenses": "MIT", + "org.opencontainers.image.url": "https://kanboard.org", + "org.opencontainers.image.documentation": "https://docs.kanboard.org" + } + }, + "purl": "pkg:docker/Kanboard" +}] \ No newline at end of file diff --git a/tests/packagedcode/test_dockerfile_ocilabels.py b/tests/packagedcode/test_dockerfile_ocilabels.py new file mode 100644 index 0000000000..d71e5d3ee4 --- /dev/null +++ b/tests/packagedcode/test_dockerfile_ocilabels.py @@ -0,0 +1,65 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# ScanCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/scancode-toolkit for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import pytest +import json +import os +from commoncode.testcase import FileDrivenTesting +from scancode.cli_test_utils import run_scan_click +from packagedcode.dockerfile_ocilabels import DockerOCILabelsHandler + +class TestDockerOCILabelsHandler(FileDrivenTesting): + test_data_dir = os.path.join(os.path.dirname(__file__), 'data') + + @pytest.mark.parametrize('test_file, expected', [ + ('docker/test.dockerfile', True), + ('docker/test.containerfile', True), + ]) + def test_is_datafile(self, test_file, expected): + test_file_path = self.get_test_loc(test_file) + assert DockerOCILabelsHandler.is_datafile(test_file_path) == expected + + def test_parse_dockerfile(self): + test_files = [ + ('test.dockerfile', 'test.dockerfile-package.expected.json'), + ('test.containerfile', 'test.containerfile-package.expected.json'), + ] + for dockerfile, expected in test_files: + test_file = self.get_test_loc(f'docker/{dockerfile}') + expected_loc = self.get_test_loc(f'docker/{expected}') + packages = list(DockerOCILabelsHandler.parse(test_file)) + expected_packages = self.load_expected(expected_loc) + assert packages == expected_packages + + def test_extract_oci_labels_from_dockerfile(self): + test_files = [ + ('test.dockerfile', 'test.dockerfile-expected.json'), + ('test.containerfile', 'test.containerfile-expected.json'), + ] + for dockerfile, expected in test_files: + dockerfile_path = self.get_test_loc(f'docker/{dockerfile}') + labels = DockerOCILabelsHandler.extract_oci_labels_from_dockerfile(dockerfile_path) + expected_loc = self.get_test_loc(f'docker/{expected}') + expected_labels = self.load_expected(expected_loc) + assert labels == expected_labels + + def test_full_scan_docker_oci_labels_containerfile(self): + test_file = self.get_test_loc('docker/test.containerfile') + result_file = self.get_temp_file('json') + run_scan_click(['--package', test_file, '--json-pp', result_file]) + result = json.load(open(result_file)) + expected_loc = self.get_test_loc('docker/test.containerfile-scan.expected.json') + expected_package_data = json.load(open(expected_loc)) + package_data = result.get('package_data', []) + assert len(package_data) == 1 + assert package_data == expected_package_data + + def load_expected(self, expected_file): + with open(expected_file) as f: + return json.load(f) \ No newline at end of file