diff --git a/setup-mini.cfg b/setup-mini.cfg index b335a995f1..9d24ba15ba 100644 --- a/setup-mini.cfg +++ b/setup-mini.cfg @@ -150,7 +150,9 @@ packages = regipy >= 3.1.0; platform_system == 'Linux' packagedcode_msitools >= 0.101.210706; platform_system == 'Linux' go-inspector >= 0.5.0; platform_system == 'Linux' + rust-inspector >= 0.1.0; platform_system == 'Linux' + develop [options.entry_points] console_scripts = diff --git a/setup.cfg b/setup.cfg index 31269077d1..962943e75e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -150,7 +150,9 @@ packages = regipy >= 3.1.0; platform_system == 'Linux' packagedcode_msitools >= 0.101.210706; platform_system == 'Linux' go-inspector >= 0.5.0; platform_system == 'Linux' + support-OCI-labels rust-inspector >= 0.1.0; platform_system == 'Linux' + [options.entry_points] console_scripts = diff --git a/src/packagedcode/__init__.py b/src/packagedcode/__init__.py index 778d043492..168b987cac 100644 --- a/src/packagedcode/__init__.py +++ b/src/packagedcode/__init__.py @@ -18,6 +18,7 @@ from packagedcode import debian from packagedcode import debian_copyright from packagedcode import distro +from packagedcode import dockerfile from packagedcode import conda from packagedcode import conan from packagedcode import cocoapods @@ -95,6 +96,7 @@ debian.DebianSourcePackageTarballHandler, distro.EtcOsReleaseHandler, + dockerfile.DockerfileHandler, freebsd.CompactManifestHandler, diff --git a/src/packagedcode/dockerfile.py b/src/packagedcode/dockerfile.py new file mode 100644 index 0000000000..cc97bdc147 --- /dev/null +++ b/src/packagedcode/dockerfile.py @@ -0,0 +1,59 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# ScanCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/scancode-toolkit for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + + + +import io +from pathlib import Path +from dockerfile_parse import DockerfileParser +from packagedcode import models +from packagedcode import utils +import fnmatch + + +class DockerfileHandler(models.DatafileHandler): + datasource_id = 'dockerfile_oci_labels' + + @classmethod + def is_datafile(cls, path): + patterns = ['Dockerfile', 'containerfile', '*.dockerfile'] + filename=os.path.basename(path) + for pattern in patterns: + if fnmatch.fnmatch(filename, pattern): + return True + return False + + @classmethod + def parse(cls, location, package_only=False): + """ + Parse a Dockerfile and yield one or more PackageData objects with OCI labels and metadata. + """ + labels = cls.extract_oci_labels_from_dockerfile(location) + package_data = { + 'datasource_id': cls.datasource_id, + 'type': cls.default_package_type, + 'name': labels.get('name', 'None'), + 'version': labels.get('version', 'None'), + 'license_expression': labels.get('license', 'None'), + 'labels': labels, + } + + yield models.PackageData.from_data(package_data, package_only) + + @classmethod + def extract_oci_labels_from_dockerfile(cls, dockerfile_path): + """ + Extract OCI labels from the Dockerfile using DockerfileParser. + """ + labels = {} + parser = DockerfileParser() + with open(dockerfile_path, 'r') as dockerfile: + parser.content = dockerfile.read() + labels = parser.labels + return labels diff --git a/tests/packagedcode/__pycache__/test_parse_pyproject_toml.cpython-312-pytest-8.3.3.pyc.26520 b/tests/packagedcode/__pycache__/test_parse_pyproject_toml.cpython-312-pytest-8.3.3.pyc.26520 new file mode 100644 index 0000000000..1eb645b2da Binary files /dev/null and b/tests/packagedcode/__pycache__/test_parse_pyproject_toml.cpython-312-pytest-8.3.3.pyc.26520 differ diff --git a/tests/packagedcode/data/docker/containerfile-expected.json b/tests/packagedcode/data/docker/containerfile-expected.json new file mode 100644 index 0000000000..92fe52b842 --- /dev/null +++ b/tests/packagedcode/data/docker/containerfile-expected.json @@ -0,0 +1,13 @@ +[ + { + "datasource_id": "dockerfile_oci_labels", + "type": "default", + "name": "Unknown", + "version": "Unknown", + "license_expression": "GPL-2.0-only AND BSD-2-Clause", + "labels": { + "source": "https://github.com/kubernetes-sigs/blixt", + "licenses": "GPL-2.0-only,BSD-2-Clause" + } + } +] diff --git a/tests/packagedcode/data/docker/psql-expected.json b/tests/packagedcode/data/docker/psql-expected.json new file mode 100644 index 0000000000..d9f8bb4c5c --- /dev/null +++ b/tests/packagedcode/data/docker/psql-expected.json @@ -0,0 +1,10 @@ +[ + { + "datasource_id": "dockerfile_oci_labels", + "type": "default", + "license_expression": "MIT", + "labels": { + "source": "https://github.com/kreneskyp/ix" + } + } +] diff --git a/tests/packagedcode/data/docker/psql.dockerfile b/tests/packagedcode/data/docker/psql.dockerfile new file mode 100644 index 0000000000..cbdd9e54eb --- /dev/null +++ b/tests/packagedcode/data/docker/psql.dockerfile @@ -0,0 +1,5 @@ +FROM postgres:15.3 +LABEL org.opencontainers.image.source https://github.com/kreneskyp/ix + +RUN apt update -y && \ + apt install -y postgresql-15-pgvector \ diff --git a/tests/packagedcode/data/docker/test-dockerfile-expected.json b/tests/packagedcode/data/docker/test-dockerfile-expected.json new file mode 100644 index 0000000000..2a843acee2 --- /dev/null +++ b/tests/packagedcode/data/docker/test-dockerfile-expected.json @@ -0,0 +1,18 @@ +[ + { + "datasource_id": "dockerfile_oci_labels", + "type": "default", + "name": "Kanboard", + "version": "1.2.42", + "license_expression": "MIT", + "labels": { + "source": "https://github.com/kanboard/kanboard", + "title": "Kanboard", + "description": "Kanboard is project management software that focuses on the Kanban methodology", + "vendor": "Kanboard", + "licenses": "MIT", + "url": "https://kanboard.org", + "documentation": "https://docs.kanboard.org" + } + } +] diff --git a/tests/packagedcode/data/docker/test.containerfile b/tests/packagedcode/data/docker/test.containerfile new file mode 100644 index 0000000000..2145e4476d --- /dev/null +++ b/tests/packagedcode/data/docker/test.containerfile @@ -0,0 +1,80 @@ +#Copied from https://github.com/kubernetes-sigs/blixt/blob + + +FROM rust:1.79-slim-bookworm as builder + +ARG TARGETARCH +ARG LLVM_VERSION=19 + +RUN apt-get update +RUN apt-get install --yes \ + build-essential \ + protobuf-compiler \ + pkg-config \ + musl-tools \ + clang \ + wget + +RUN apt install --yes lsb-release software-properties-common gnupg +RUN wget -O /tmp/llvm.sh https://apt.llvm.org/llvm.sh +RUN chmod +x /tmp/llvm.sh +RUN /bin/sh -c "/tmp/llvm.sh ${LLVM_VERSION} all" + +RUN rustup default stable +RUN rustup install nightly +RUN rustup component add rust-src --toolchain nightly +RUN --mount=type=cache,target=/root/.cargo/registry \ + cargo install bpf-linker + +WORKDIR /workspace +# Docker uses the amd64/arm64 convention while Rust uses the x86_64/aarch64 convention. +# Since Dockerfile doesn't support conditional variables (sigh), write the arch in Rust's +# convention to a file for later usage. +RUN if [ "$TARGETARCH" = "amd64" ]; \ + then echo "x86_64" >> arch; \ + else echo "aarch64" >> arch; \ + fi +RUN rustup target add $(eval cat arch)-unknown-linux-musl + +COPY dataplane dataplane +COPY tools/udp-test-server tools/udp-test-server +COPY xtask xtask +COPY Cargo.toml Cargo.toml +COPY Cargo.lock Cargo.lock +COPY .cargo .cargo + +# We need to tell bpf-linker where it can find LLVM's shared library file. +# Ref: https://github.com/aya-rs/rustc-llvm-proxy/blob/cbcb3c6/src/lib.rs#L48 +ENV LD_LIBRARY_PATH="/usr/lib/llvm-$LLVM_VERSION/lib" +ENV CC_aarch64_unknown_linux_musl="/usr/lib/llvm-$LLVM_VERSION/bin/clang" +ENV AR_aarch64_unknown_linux_musl="/usr/lib/llvm-$LLVM_VERSION/bin/llvm-ar" +ENV CC_x86_64_unknown_linux_musl="/usr/lib/llvm-$LLVM_VERSION/bin/clang" +ENV AR_x86_64_unknown_linux_musl="/usr/lib/llvm-$LLVM_VERSION/bin/llvm-ar" +ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_MUSL_RUSTFLAGS="-Clink-self-contained=yes -Clinker=rust-lld" + +RUN --mount=type=cache,target=/workspace/target/ \ + --mount=type=cache,target=/root/.cargo/registry \ + cargo xtask build-ebpf --release +RUN --mount=type=cache,target=/workspace/target/ \ + --mount=type=cache,target=/root/.cargo/registry \ + RUSTFLAGS=-Ctarget-feature=+crt-static cargo build \ + --workspace \ + --exclude ebpf \ + --release \ + --target=$(eval cat arch)-unknown-linux-musl +RUN --mount=type=cache,target=/workspace/target/ \ + cp /workspace/target/$(eval cat arch)-unknown-linux-musl/release/loader /workspace/dataplane-release + +FROM alpine + +LABEL org.opencontainers.image.source=https://github.com/kubernetes-sigs/blixt +LABEL org.opencontainers.image.licenses=GPL-2.0-only,BSD-2-Clause + +WORKDIR /opt/blixt/ + +COPY --from=builder /workspace/dataplane-release /opt/blixt/dataplane + +COPY dataplane/LICENSE.GPL-2.0 /opt/blixt/LICENSE.GPL-2.0 +COPY dataplane/LICENSE.BSD-2-Clause /opt/blixt/LICENSE.BSD-2-Clause + +ENTRYPOINT ["/opt/blixt/dataplane"] \ No newline at end of file diff --git a/tests/packagedcode/data/docker/test.dockerfile b/tests/packagedcode/data/docker/test.dockerfile new file mode 100644 index 0000000000..bb5efa2459 --- /dev/null +++ b/tests/packagedcode/data/docker/test.dockerfile @@ -0,0 +1,36 @@ +#Copied from https://github.com/kanboard/kanboard + +FROM alpine:3.21 + +LABEL org.opencontainers.image.source https://github.com/kanboard/kanboard +LABEL org.opencontainers.image.title=Kanboard +LABEL org.opencontainers.image.description="Kanboard is project management software that focuses on the Kanban methodology" +LABEL org.opencontainers.image.vendor=Kanboard +LABEL org.opencontainers.image.licenses=MIT +LABEL org.opencontainers.image.url=https://kanboard.org +LABEL org.opencontainers.image.documentation=https://docs.kanboard.org + +VOLUME /var/www/app/data +VOLUME /var/www/app/plugins +VOLUME /etc/nginx/ssl + +EXPOSE 80 443 + +ARG VERSION + +RUN apk --no-cache --update add \ + tzdata openssl unzip nginx bash ca-certificates s6 curl ssmtp mailx php83 php83-phar php83-curl \ + php83-fpm php83-json php83-zlib php83-xml php83-dom php83-ctype php83-opcache php83-zip php83-iconv \ + php83-pdo php83-pdo_mysql php83-pdo_sqlite php83-pdo_pgsql php83-mbstring php83-session php83-bcmath \ + php83-gd php83-openssl php83-sockets php83-posix php83-ldap php83-simplexml php83-xmlwriter && \ + rm -rf /var/www/localhost && \ + rm -f /etc/php83/php-fpm.d/www.conf && \ + ln -sf /usr/bin/php83 /usr/bin/php + +ADD . /var/www/app +ADD docker/ / + +RUN rm -rf /var/www/app/docker && echo $VERSION > /var/www/app/app/version.txt + +ENTRYPOINT ["/usr/local/bin/entrypoint.sh"] +CMD [] \ No newline at end of file diff --git a/tests/packagedcode/test_dockerfile.py b/tests/packagedcode/test_dockerfile.py new file mode 100644 index 0000000000..34353f056e --- /dev/null +++ b/tests/packagedcode/test_dockerfile.py @@ -0,0 +1,60 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# ScanCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/scancode-toolkit for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +from packagedcode import dockerfile +import pytest +import os.path +import json +from pathlib import Path +from packagedcode.dockerfile import DockerfileHandler + +class TestDockerfileHandler: + + def get_test_loc(self, path): + return Path(os.path.join(os.path.dirname(__file__), 'data')) + + def load_expected(self, expected_file): + with open(expected_file) as f: + return json.load(f) + + def test_is_datafile(self): + dockerfiles = [ + 'test.dockerfile', + 'test.containerfile', + 'psql.dockerfile' + ] + for dockerfile in dockerfiles: + test_file = self.get_test_loc(f'data/docker/{dockerfile}') + assert DockerfileHandler.is_datafile(str(test_file)) + + def test_parse_dockerfile(self): + test_files = [ + ('test.dockerfile', 'test-dockerfile-expected.json'), + ('test.containerfile', 'containerfile-expected.json'), + ('psql.dockerfile', 'psql-expected.json') + ] + for dockerfile, expected in test_files: + test_file = self.get_test_loc(f'data/docker/{dockerfile}') + expected_loc = self.get_test_loc(f'data/docker/{expected}') + packages = list(DockerfileHandler.parse(str(test_file))) + expected_packages = self.load_expected(expected_loc) + assert packages == expected_packages + + def test_extract_oci_labels_from_dockerfile(self, mocker): + dockerfiles = [ + 'test.dockerfile', + 'test.containerfile', + 'psql.dockerfile' + ] + for dockerfile in dockerfiles: + dockerfile_path = self.get_test_loc(f'data/docker/{dockerfile}') + labels = DockerfileHandler.extract_oci_labels_from_dockerfile(str(dockerfile_path)) + expected_loc = self.get_test_loc(f'data/docker/{dockerfile.replace(".dockerfile", "-expected.json").replace(".containerfile", "-expected.json")}') + expected_labels = self.load_expected(expected_loc)[0]['labels'] + assert labels == expected_labels