From 1fa3f719c8123b54dacc218678bea4c84fef274a Mon Sep 17 00:00:00 2001 From: Souta Kawahara Date: Wed, 21 Aug 2024 20:22:18 +0900 Subject: [PATCH] Fix for SPDX validation failure due to invalid CPE strings I've fixed an issue where an error occurs when generating SPDX SBOM for packages with '+' in their names, due to an invalid CPE string. Also, I've fixed to escape special characters other than +. As spdx-tools does not support percent encoding, I'm using backslash escape encoding instead. This patch fixes: - https://github.com/AlmaLinux/alma-sbom/issues/43 --- alma_sbom.py | 5 +++-- libsbom/common.py | 13 +++++++++++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/alma_sbom.py b/alma_sbom.py index 7e3ec27..5d7ba33 100755 --- a/alma_sbom.py +++ b/alma_sbom.py @@ -14,6 +14,7 @@ from libsbom import cyclonedx as alma_cyclonedx from libsbom import spdx as alma_spdx +from libsbom import common ALBS_URL = 'https://build.almalinux.org' IS_SIGNED = 3 @@ -164,8 +165,8 @@ def _generate_cpe(package_nevra: PackageNevra) -> str: cpe_epoch_part += '\\:' if cpe_epoch_part else "" cpe = ( f'cpe:{cpe_version}:a:almalinux:' - f'{package_nevra.name}:{cpe_epoch_part}' - f'{package_nevra.version}-{package_nevra.release}:*:*:*:*:*:*:*' + f'{common.escape_encode_cpe_part(package_nevra.name)}:{cpe_epoch_part}' + f'{common.escape_encode_cpe_part(package_nevra.version)}-{common.escape_encode_cpe_part(package_nevra.release)}:*:*:*:*:*:*:*' ) return cpe diff --git a/libsbom/common.py b/libsbom/common.py index 74d03fa..8e043d7 100644 --- a/libsbom/common.py +++ b/libsbom/common.py @@ -1,4 +1,5 @@ import typing +import re def replace_patterns(input_str: str, patterns: typing.Dict[str, str]) -> str: """Convenience function to perform multiple string replacements.""" @@ -34,6 +35,18 @@ def normalize_epoch_in_purl(purl: str) -> str: return replace_patterns(input_str=purl, patterns=patterns) +def escape_encode_cpe_part(cpe: str) -> str: + """Escape special characters in cpe each part in accordance with the spdx-tools validation""" + + allowed_chars = r'a-zA-Z0-9\-\._' + escape_chars = r'\\*?!"#$%&\'()+,/:;<=>@[]^`{|}~' + + def encode_char(match): + char = match.group(0) + if char in escape_chars: + return '\\' + char + + return re.sub(f'[^{allowed_chars}]', encode_char, cpe) def normalize_epoch_in_cpe(cpe: str) -> str: """Replace unset epochs in CPEs with 0."""