-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #174 from opossum-tool/feat-convert-scan-code
feat: convert scan code to .opossum
- Loading branch information
Showing
15 changed files
with
48,589 additions
and
28 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
# SPDX-FileCopyrightText: TNG Technology Consulting GmbH <https://www.tngtech.com> | ||
# | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
SCANCODE_SOURCE_NAME = "SC" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
# SPDX-FileCopyrightText: TNG Technology Consulting GmbH <https://www.tngtech.com> | ||
# | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
|
||
import json | ||
import logging | ||
import sys | ||
import uuid | ||
|
||
from opossum_lib.opossum.opossum_file import ( | ||
Metadata, | ||
OpossumInformation, | ||
) | ||
from opossum_lib.scancode.model import Header, ScanCodeData | ||
from opossum_lib.scancode.resource_tree import ( | ||
convert_to_opossum_resources, | ||
create_attribution_mapping, | ||
scancode_to_file_tree, | ||
) | ||
|
||
|
||
def convert_scancode_to_opossum(filename: str) -> OpossumInformation: | ||
logging.info(f"Converting scancode to opossum {filename}") | ||
|
||
scancode_data = load_scancode_json(filename) | ||
|
||
filetree = scancode_to_file_tree(scancode_data) | ||
resources = convert_to_opossum_resources(filetree) | ||
external_attributions, resources_to_attributions = create_attribution_mapping( | ||
filetree | ||
) | ||
|
||
scancode_header = extract_scancode_header(scancode_data, filename) | ||
metadata = Metadata( | ||
projectId=str(uuid.uuid4()), | ||
fileCreationDate=scancode_header.end_timestamp, | ||
projectTitle="ScanCode file", | ||
) | ||
|
||
return OpossumInformation( | ||
metadata=metadata, | ||
resources=resources, | ||
externalAttributions=external_attributions, | ||
resourcesToAttributions=resources_to_attributions, | ||
attributionBreakpoints=[], | ||
externalAttributionSources={}, | ||
) | ||
|
||
|
||
def load_scancode_json(filename: str) -> ScanCodeData: | ||
try: | ||
with open(filename) as inp: | ||
json_data = json.load(inp) | ||
except json.JSONDecodeError as e: | ||
logging.error(f"Error decoding json for file {filename}. Message: {e.msg}") | ||
sys.exit(1) | ||
except UnicodeDecodeError: | ||
logging.error(f"Error decoding json for file {filename}.") | ||
sys.exit(1) | ||
|
||
scancode_data = ScanCodeData.model_validate(json_data) | ||
|
||
return scancode_data | ||
|
||
|
||
def extract_scancode_header(scancode_data: ScanCodeData, filename: str) -> Header: | ||
if len(scancode_data.headers) != 1: | ||
logging.error(f"Headers of ScanCode file are invalid. File: {filename}") | ||
sys.exit(1) | ||
return scancode_data.headers[0] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
# SPDX-FileCopyrightText: TNG Technology Consulting GmbH <https://www.tngtech.com> | ||
# | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
|
||
import os.path | ||
|
||
from pydantic import BaseModel | ||
from pydantic_core import SchemaValidator | ||
|
||
|
||
def path_segments(path: str) -> list[str]: | ||
path = os.path.normpath(path) | ||
return path.split(os.sep) | ||
|
||
|
||
def check_schema(model: BaseModel) -> None: | ||
schema_validator = SchemaValidator(schema=model.__pydantic_core_schema__) | ||
schema_validator.validate_python(model.__dict__) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,157 @@ | ||
# SPDX-FileCopyrightText: TNG Technology Consulting GmbH <https://www.tngtech.com> | ||
# | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
from __future__ import annotations | ||
|
||
from enum import Enum | ||
from typing import Any | ||
|
||
from pydantic import BaseModel | ||
|
||
|
||
class Options(BaseModel, extra="ignore"): | ||
input: list[str] | ||
|
||
|
||
class SystemEnvironment(BaseModel): | ||
cpu_architecture: str | ||
operating_system: str | ||
platform: str | ||
platform_version: str | ||
python_version: str | ||
|
||
|
||
class ExtraData(BaseModel): | ||
files_count: int | ||
spdx_license_list_version: str | ||
system_environment: SystemEnvironment | ||
|
||
|
||
class Header(BaseModel): | ||
duration: float | ||
end_timestamp: str | ||
errors: list | ||
extra_data: ExtraData | ||
message: Any | ||
notice: str | ||
options: Options | ||
output_format_version: str | ||
start_timestamp: str | ||
tool_name: str | ||
tool_version: str | ||
warnings: list | ||
|
||
|
||
class ReferenceMatch(BaseModel): | ||
end_line: int | ||
from_file: str | ||
license_expression: str | ||
license_expression_spdx: str | ||
matched_length: int | ||
matcher: str | ||
match_coverage: float | ||
rule_identifier: str | ||
rule_relevance: int | ||
rule_url: Any | ||
score: float | ||
start_line: int | ||
|
||
|
||
class GlobalLicenseDetection(BaseModel): | ||
detection_count: int | ||
identifier: str | ||
license_expression: str | ||
license_expression_spdx: str | ||
reference_matches: list[ReferenceMatch] | ||
|
||
|
||
class Match(BaseModel): | ||
end_line: int | ||
from_file: str | ||
license_expression: str | ||
license_expression_spdx: str | ||
matched_length: int | ||
matcher: str | ||
match_coverage: float | ||
rule_identifier: str | ||
rule_relevance: int | ||
rule_url: Any | ||
score: float | ||
start_line: int | ||
|
||
|
||
class FileBasedLicenseDetection(BaseModel): | ||
license_expression: str | ||
license_expression_spdx: str | ||
matches: list[Match] | ||
identifier: str | ||
|
||
|
||
class Copyright(BaseModel): | ||
copyright: str | ||
end_line: int | ||
start_line: int | ||
|
||
|
||
class Holder(BaseModel): | ||
end_line: int | ||
holder: str | ||
start_line: int | ||
|
||
|
||
class Url(BaseModel): | ||
end_line: int | ||
start_line: int | ||
url: str | ||
|
||
|
||
class FileType(Enum): | ||
FILE = "file" | ||
DIRECTORY = "directory" | ||
|
||
|
||
class File(BaseModel): | ||
authors: list | ||
base_name: str | ||
copyrights: list[Copyright] | ||
date: str | None | ||
detected_license_expression: str | None | ||
detected_license_expression_spdx: str | None | ||
dirs_count: int | ||
emails: list | ||
extension: str | ||
files_count: int | ||
file_type: str | None | ||
for_packages: list | ||
holders: list[Holder] | ||
is_archive: bool | ||
is_binary: bool | ||
is_media: bool | ||
is_script: bool | ||
is_source: bool | ||
is_text: bool | ||
license_clues: list | ||
license_detections: list[FileBasedLicenseDetection] | ||
md5: str | None | ||
mime_type: str | None | ||
name: str | ||
package_data: list | ||
path: str | ||
percentage_of_license_text: float | ||
programming_language: str | None | ||
scan_errors: list | ||
sha1: str | None | ||
sha256: str | None | ||
size: int | ||
size_count: int | ||
type: FileType | ||
urls: list[Url] | ||
|
||
|
||
class ScanCodeData(BaseModel): | ||
dependencies: list | ||
files: list[File] | ||
license_detections: list[GlobalLicenseDetection] | ||
headers: list[Header] | ||
packages: list |
Oops, something went wrong.