Skip to content

Commit

Permalink
Merge pull request #174 from opossum-tool/feat-convert-scan-code
Browse files Browse the repository at this point in the history
feat: convert scan code to .opossum
  • Loading branch information
abraemer authored Jan 16, 2025
2 parents f7b6301 + 5dd967f commit 5054a60
Show file tree
Hide file tree
Showing 15 changed files with 48,589 additions and 28 deletions.
20 changes: 14 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,22 @@ Usage: uv run opossum-file generate [OPTIONS]

Currently supported input formats:
- SPDX
- ScanCode
- Opossum

Options:
--spdx PATH SPDX files used as input.
-o, --outfile TEXT The file path to write the generated opossum document
to. If appropriate, the extension ".opossum" will be
appended. [default: output.opossum]
--help Show this message and exit.

--spdx PATH Specify a path to a SPDX file that you would like to
include in the final output. Option can be repeated.
--opossum PATH Specify a path to a .opossum file that you would like
to include in the final output. Option can be
repeated.
--scan-code-json PATH Specify a path to a .json file generated by ScanCode
that you would like to include in the final output.
Option can be repeated.
-o, --outfile TEXT The file path to write the generated opossum document
to. If appropriate, the extension ".opossum" will be
appended. [default: output.opossum]
--help Show this message and exit.
```

# Development
Expand Down
51 changes: 40 additions & 11 deletions src/opossum_lib/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from opossum_lib.opossum.file_generation import write_opossum_information_to_file
from opossum_lib.opossum.opossum_file import OpossumInformation
from opossum_lib.opossum.read_opossum_file import read_opossum_file
from opossum_lib.scancode.convert_scancode_to_opossum import convert_scancode_to_opossum
from opossum_lib.spdx.convert_to_opossum import convert_spdx_to_opossum_information


Expand All @@ -25,13 +26,25 @@ def opossum_file() -> None:
@opossum_file.command()
@click.option(
"--spdx",
help="SPDX files used as input.",
"spdx_files",
help="Specify a path to a SPDX file that you would like to "
+ "include in the final output. Option can be repeated.",
multiple=True,
type=click.Path(exists=True),
)
@click.option(
"--opossum",
help="opossum files used as input.",
"opossum_files",
help="Specify a path to a .opossum file that you would like to "
+ "include in the final output. Option can be repeated.",
multiple=True,
type=click.Path(exists=True),
)
@click.option(
"--scan-code-json",
"scancode_json_files",
help="Specify a path to a .json file generated by ScanCode that you would like to "
+ "include in the final output. Option can be repeated.",
multiple=True,
type=click.Path(exists=True),
)
Expand All @@ -43,16 +56,25 @@ def opossum_file() -> None:
help="The file path to write the generated opossum document to. "
'If appropriate, the extension ".opossum" will be appended.',
)
def generate(spdx: list[str], opossum: list[str], outfile: str) -> None:
def generate(
spdx_files: list[str],
scancode_json_files: list[str],
opossum_files: list[str],
outfile: str,
) -> None:
"""
Generate an Opossum file from various other file formats.
\b
Currently supported input formats:
- SPDX
- ScanCode
- Opossum
"""
validate_input_exit_on_error(spdx, opossum)
opossum_information = convert_after_valid_input(spdx, opossum)
validate_input_and_exit_on_error(spdx_files, scancode_json_files, opossum_files)
opossum_information = convert_after_valid_input(
spdx_files, scancode_json_files, opossum_files
)

if not outfile.endswith(".opossum"):
outfile += ".opossum"
Expand All @@ -63,8 +85,12 @@ def generate(spdx: list[str], opossum: list[str], outfile: str) -> None:
write_opossum_information_to_file(opossum_information, Path(outfile))


def validate_input_exit_on_error(spdx: list[str], opossum: list[str]) -> None:
total_number_of_files = len(spdx) + len(opossum)
def validate_input_and_exit_on_error(
spdx_files: list[str], scancode_json_files: list[str], opossum_files: list[str]
) -> None:
total_number_of_files = (
len(spdx_files) + len(scancode_json_files) + len(opossum_files)
)
if total_number_of_files == 0:
logging.warning("No input provided. Exiting.")
sys.exit(1)
Expand All @@ -74,11 +100,14 @@ def validate_input_exit_on_error(spdx: list[str], opossum: list[str]) -> None:


def convert_after_valid_input(
spdx: list[str], opossum_files: list[str]
spdx_files: list[str], scancode_json_files: list[str], opossum_files: list[str]
) -> OpossumInformation:
if len(spdx) == 1:
the_spdx_file = spdx[0]
return convert_spdx_to_opossum_information(the_spdx_file)
if len(spdx_files) == 1:
spdx_input_file = spdx_files[0]
return convert_spdx_to_opossum_information(spdx_input_file)
elif len(scancode_json_files) == 1:
scancode_json_input_file = scancode_json_files[0]
return convert_scancode_to_opossum(scancode_json_input_file)
else:
opossum_input_file = opossum_files[0]
return read_opossum_file(opossum_input_file)
Expand Down
Empty file.
5 changes: 5 additions & 0 deletions src/opossum_lib/scancode/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# SPDX-FileCopyrightText: TNG Technology Consulting GmbH <https://www.tngtech.com>
#
# SPDX-License-Identifier: Apache-2.0

SCANCODE_SOURCE_NAME = "SC"
71 changes: 71 additions & 0 deletions src/opossum_lib/scancode/convert_scancode_to_opossum.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
# SPDX-FileCopyrightText: TNG Technology Consulting GmbH <https://www.tngtech.com>
#
# SPDX-License-Identifier: Apache-2.0


import json
import logging
import sys
import uuid

from opossum_lib.opossum.opossum_file import (
Metadata,
OpossumInformation,
)
from opossum_lib.scancode.model import Header, ScanCodeData
from opossum_lib.scancode.resource_tree import (
convert_to_opossum_resources,
create_attribution_mapping,
scancode_to_file_tree,
)


def convert_scancode_to_opossum(filename: str) -> OpossumInformation:
logging.info(f"Converting scancode to opossum {filename}")

scancode_data = load_scancode_json(filename)

filetree = scancode_to_file_tree(scancode_data)
resources = convert_to_opossum_resources(filetree)
external_attributions, resources_to_attributions = create_attribution_mapping(
filetree
)

scancode_header = extract_scancode_header(scancode_data, filename)
metadata = Metadata(
projectId=str(uuid.uuid4()),
fileCreationDate=scancode_header.end_timestamp,
projectTitle="ScanCode file",
)

return OpossumInformation(
metadata=metadata,
resources=resources,
externalAttributions=external_attributions,
resourcesToAttributions=resources_to_attributions,
attributionBreakpoints=[],
externalAttributionSources={},
)


def load_scancode_json(filename: str) -> ScanCodeData:
try:
with open(filename) as inp:
json_data = json.load(inp)
except json.JSONDecodeError as e:
logging.error(f"Error decoding json for file {filename}. Message: {e.msg}")
sys.exit(1)
except UnicodeDecodeError:
logging.error(f"Error decoding json for file {filename}.")
sys.exit(1)

scancode_data = ScanCodeData.model_validate(json_data)

return scancode_data


def extract_scancode_header(scancode_data: ScanCodeData, filename: str) -> Header:
if len(scancode_data.headers) != 1:
logging.error(f"Headers of ScanCode file are invalid. File: {filename}")
sys.exit(1)
return scancode_data.headers[0]
19 changes: 19 additions & 0 deletions src/opossum_lib/scancode/helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# SPDX-FileCopyrightText: TNG Technology Consulting GmbH <https://www.tngtech.com>
#
# SPDX-License-Identifier: Apache-2.0


import os.path

from pydantic import BaseModel
from pydantic_core import SchemaValidator


def path_segments(path: str) -> list[str]:
path = os.path.normpath(path)
return path.split(os.sep)


def check_schema(model: BaseModel) -> None:
schema_validator = SchemaValidator(schema=model.__pydantic_core_schema__)
schema_validator.validate_python(model.__dict__)
157 changes: 157 additions & 0 deletions src/opossum_lib/scancode/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
# SPDX-FileCopyrightText: TNG Technology Consulting GmbH <https://www.tngtech.com>
#
# SPDX-License-Identifier: Apache-2.0

from __future__ import annotations

from enum import Enum
from typing import Any

from pydantic import BaseModel


class Options(BaseModel, extra="ignore"):
input: list[str]


class SystemEnvironment(BaseModel):
cpu_architecture: str
operating_system: str
platform: str
platform_version: str
python_version: str


class ExtraData(BaseModel):
files_count: int
spdx_license_list_version: str
system_environment: SystemEnvironment


class Header(BaseModel):
duration: float
end_timestamp: str
errors: list
extra_data: ExtraData
message: Any
notice: str
options: Options
output_format_version: str
start_timestamp: str
tool_name: str
tool_version: str
warnings: list


class ReferenceMatch(BaseModel):
end_line: int
from_file: str
license_expression: str
license_expression_spdx: str
matched_length: int
matcher: str
match_coverage: float
rule_identifier: str
rule_relevance: int
rule_url: Any
score: float
start_line: int


class GlobalLicenseDetection(BaseModel):
detection_count: int
identifier: str
license_expression: str
license_expression_spdx: str
reference_matches: list[ReferenceMatch]


class Match(BaseModel):
end_line: int
from_file: str
license_expression: str
license_expression_spdx: str
matched_length: int
matcher: str
match_coverage: float
rule_identifier: str
rule_relevance: int
rule_url: Any
score: float
start_line: int


class FileBasedLicenseDetection(BaseModel):
license_expression: str
license_expression_spdx: str
matches: list[Match]
identifier: str


class Copyright(BaseModel):
copyright: str
end_line: int
start_line: int


class Holder(BaseModel):
end_line: int
holder: str
start_line: int


class Url(BaseModel):
end_line: int
start_line: int
url: str


class FileType(Enum):
FILE = "file"
DIRECTORY = "directory"


class File(BaseModel):
authors: list
base_name: str
copyrights: list[Copyright]
date: str | None
detected_license_expression: str | None
detected_license_expression_spdx: str | None
dirs_count: int
emails: list
extension: str
files_count: int
file_type: str | None
for_packages: list
holders: list[Holder]
is_archive: bool
is_binary: bool
is_media: bool
is_script: bool
is_source: bool
is_text: bool
license_clues: list
license_detections: list[FileBasedLicenseDetection]
md5: str | None
mime_type: str | None
name: str
package_data: list
path: str
percentage_of_license_text: float
programming_language: str | None
scan_errors: list
sha1: str | None
sha256: str | None
size: int
size_count: int
type: FileType
urls: list[Url]


class ScanCodeData(BaseModel):
dependencies: list
files: list[File]
license_detections: list[GlobalLicenseDetection]
headers: list[Header]
packages: list
Loading

0 comments on commit 5054a60

Please sign in to comment.