diff --git a/CHANGELOG.md b/CHANGELOG.md index 7efcb6bb2..adc9cbcc1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -60,7 +60,8 @@ The versions follow [semantic versioning](https://semver.org). - Sanitize xargs input in scripts documentation - License identifiers in comments with symmetrical ASCII art frames are now properly detected (#560) - +- Fixed an error where copyright statements contained within a multi-line + comment style on a single line could not be parsed (#593). - In PHP files, add header after ` bool: @classmethod def can_handle_multi(cls) -> bool: """Whether the :class:`CommentStyle` can handle multi-line comments.""" - return all((cls.MULTI_LINE[0], cls.MULTI_LINE[2])) + return all((cls.MULTI_LINE.start, cls.MULTI_LINE.end)) @classmethod def create_comment(cls, text: str, force_multi: bool = False) -> str: @@ -90,19 +100,19 @@ def _create_comment_multi(cls, text: str) -> str: if not cls.can_handle_multi(): raise CommentCreateError(f"{cls} cannot create multi-line comments") result = [] - result.append(cls.MULTI_LINE[0]) + result.append(cls.MULTI_LINE.start) for line in text.split("\n"): - if cls.MULTI_LINE[2] in text: + if cls.MULTI_LINE.end in text: raise CommentCreateError( f"'{line}' contains a premature comment delimiter" ) line_result = "" - if cls.MULTI_LINE[1]: - line_result += cls.INDENT_BEFORE_MIDDLE + cls.MULTI_LINE[1] + if cls.MULTI_LINE.middle: + line_result += cls.INDENT_BEFORE_MIDDLE + cls.MULTI_LINE.middle if line: line_result += cls.INDENT_AFTER_MIDDLE + line result.append(line_result) - result.append(cls.INDENT_BEFORE_END + cls.MULTI_LINE[2]) + result.append(cls.INDENT_BEFORE_END + cls.MULTI_LINE.end) return "\n".join(result) @classmethod @@ -139,9 +149,9 @@ def _parse_comment_single(cls, text: str) -> str: @classmethod def _remove_middle_marker(cls, line: str) -> str: - if cls.MULTI_LINE[1]: + if cls.MULTI_LINE.middle: possible_line = line.lstrip() - prefix = cls.MULTI_LINE[1] + prefix = cls.MULTI_LINE.middle if possible_line.startswith(prefix): line = possible_line.lstrip(prefix) # Note to future self: line.removeprefix would be preferable @@ -174,11 +184,11 @@ def _parse_comment_multi(cls, text: str) -> str: last = None # Set this later. last_is_first = True - if not first.startswith(cls.MULTI_LINE[0]): + if not first.startswith(cls.MULTI_LINE.start): raise CommentParseError( f"'{first}' does not start with a comment marker" ) - first = first.lstrip(cls.MULTI_LINE[0]) + first = first.lstrip(cls.MULTI_LINE.start) first = first.lstrip() for line in lines: @@ -188,11 +198,11 @@ def _parse_comment_multi(cls, text: str) -> str: if last_is_first: last = first first = "" - if not last.endswith(cls.MULTI_LINE[2]): + if not last.endswith(cls.MULTI_LINE.end): raise CommentParseError( f"'{last}' does not end with a comment delimiter" ) - last = last.rstrip(cls.MULTI_LINE[2]) + last = last.rstrip(cls.MULTI_LINE.end) last = last.rstrip() last = cls._remove_middle_marker(last) @@ -223,11 +233,11 @@ def comment_at_first_character(cls, text: str) -> str: break end = i return "\n".join(lines[0 : end + 1]) - if cls.can_handle_multi() and text.startswith(cls.MULTI_LINE[0]): + if cls.can_handle_multi() and text.startswith(cls.MULTI_LINE.start): end = 0 for i, line in enumerate(lines): end = i - if line.endswith(cls.MULTI_LINE[2]): + if line.endswith(cls.MULTI_LINE.end): break else: raise CommentParseError("Comment block never delimits") @@ -245,7 +255,7 @@ class AppleScriptCommentStyle(CommentStyle): SINGLE_LINE = "--" INDENT_AFTER_SINGLE = " " - MULTI_LINE = ("(*", "", "*)") + MULTI_LINE = MultiLineSegments("(*", "", "*)") class AspxCommentStyle(CommentStyle): @@ -253,7 +263,7 @@ class AspxCommentStyle(CommentStyle): _shorthand = "aspx" - MULTI_LINE = ("<%--", "", "--%>") + MULTI_LINE = MultiLineSegments("<%--", "", "--%>") class BatchFileCommentStyle(CommentStyle): @@ -270,7 +280,7 @@ class BibTexCommentStyle(CommentStyle): _shorthand = "bibtex" - MULTI_LINE = ("@Comment{", "", "}") + MULTI_LINE = MultiLineSegments("@Comment{", "", "}") class CCommentStyle(CommentStyle): @@ -280,7 +290,7 @@ class CCommentStyle(CommentStyle): SINGLE_LINE = "//" INDENT_AFTER_SINGLE = " " - MULTI_LINE = ("/*", "*", "*/") + MULTI_LINE = MultiLineSegments("/*", "*", "*/") INDENT_BEFORE_MIDDLE = " " INDENT_AFTER_MIDDLE = " " INDENT_BEFORE_END = " " @@ -291,7 +301,7 @@ class CssCommentStyle(CommentStyle): _shorthand = "css" - MULTI_LINE = ("/*", "*", "*/") + MULTI_LINE = MultiLineSegments("/*", "*", "*/") INDENT_BEFORE_MIDDLE = " " INDENT_AFTER_MIDDLE = " " INDENT_BEFORE_END = " " @@ -327,7 +337,7 @@ class FtlCommentStyle(CommentStyle): _shorthand = "ftl" - MULTI_LINE = ("<#--", "", "-->") + MULTI_LINE = MultiLineSegments("<#--", "", "-->") class HandlebarsCommentStyle(CommentStyle): @@ -335,7 +345,7 @@ class HandlebarsCommentStyle(CommentStyle): _shorthand = "handlebars" - MULTI_LINE = ("{{!--", "", "--}}") + MULTI_LINE = MultiLineSegments("{{!--", "", "--}}") class HaskellCommentStyle(CommentStyle): @@ -352,7 +362,7 @@ class HtmlCommentStyle(CommentStyle): _shorthand = "html" - MULTI_LINE = ("") + MULTI_LINE = MultiLineSegments("") class JinjaCommentStyle(CommentStyle): @@ -360,7 +370,7 @@ class JinjaCommentStyle(CommentStyle): _shorthand = "jinja" - MULTI_LINE = ("{#", "", "#}") + MULTI_LINE = MultiLineSegments("{#", "", "#}") class LispCommentStyle(CommentStyle): @@ -386,7 +396,7 @@ class MlCommentStyle(CommentStyle): _shorthand = "ml" - MULTI_LINE = ("(*", "*", "*)") + MULTI_LINE = MultiLineSegments("(*", "*", "*)") INDENT_BEFORE_MIDDLE = " " INDENT_AFTER_MIDDLE = " " INDENT_BEFORE_END = " " @@ -399,7 +409,7 @@ class PlantUmlCommentStyle(CommentStyle): SINGLE_LINE = "'" INDENT_AFTER_SINGLE = " " - MULTI_LINE = ("/'", "'", "'/") + MULTI_LINE = MultiLineSegments("/'", "'", "'/") INDENT_BEFORE_MIDDLE = " " INDENT_AFTER_MIDDLE = " " INDENT_BEFORE_END = " " diff --git a/src/reuse/_util.py b/src/reuse/_util.py index 2ba484811..79f8811d8 100644 --- a/src/reuse/_util.py +++ b/src/reuse/_util.py @@ -46,10 +46,10 @@ "".join( { r"(?:{})*".format( # pylint: disable=consider-using-f-string - re.escape(style.MULTI_LINE[2]) + re.escape(style.MULTI_LINE.end) ) for style in _all_style_classes() - if style.MULTI_LINE[2] + if style.MULTI_LINE.end } ) ) @@ -60,17 +60,17 @@ re.compile( r"(?P(?PSPDX-FileCopyrightText:)\s+" r"((?P\d{4} - \d{4}|\d{4}),?\s+)?" - r"(?P.*)?)" + _END_PATTERN + r"(?P.*?))" + _END_PATTERN ), re.compile( r"(?P(?PCopyright(\s?\([cC]\))?)\s+" r"((?P\d{4} - \d{4}|\d{4}),?\s+)?" - r"(?P.*)?)" + _END_PATTERN + r"(?P.*?))" + _END_PATTERN ), re.compile( r"(?P(?P©)\s+" r"((?P\d{4} - \d{4}|\d{4}),?\s+)?" - r"(?P.*)?)" + _END_PATTERN + r"(?P.*?))" + _END_PATTERN ), ] @@ -282,7 +282,7 @@ def extract_spdx_info(text: str) -> SpdxInfo: for pattern in _COPYRIGHT_PATTERNS: match = pattern.search(line) if match is not None: - copyright_matches.add(match.groupdict()["copyright"]) + copyright_matches.add(match.groupdict()["copyright"].strip()) break return SpdxInfo(expressions, copyright_matches) diff --git a/tests/test_comment.py b/tests/test_comment.py index 57f0ca892..c62b391d8 100644 --- a/tests/test_comment.py +++ b/tests/test_comment.py @@ -56,9 +56,9 @@ def test_create_comment_generic_multi(MultiStyle): text = "Hello" expected = cleandoc( f""" - {MultiStyle.MULTI_LINE[0]} - {MultiStyle.INDENT_BEFORE_MIDDLE}{MultiStyle.MULTI_LINE[1]}{MultiStyle.INDENT_AFTER_MIDDLE}Hello - {MultiStyle.INDENT_BEFORE_END}{MultiStyle.MULTI_LINE[2]} + {MultiStyle.MULTI_LINE.start} + {MultiStyle.INDENT_BEFORE_MIDDLE}{MultiStyle.MULTI_LINE.middle}{MultiStyle.INDENT_AFTER_MIDDLE}Hello + {MultiStyle.INDENT_BEFORE_END}{MultiStyle.MULTI_LINE.end} """ ) @@ -78,9 +78,23 @@ def test_parse_comment_generic_multi(MultiStyle): # pylint: disable=line-too-long text = cleandoc( f""" - {MultiStyle.MULTI_LINE[0]} - {MultiStyle.INDENT_BEFORE_MIDDLE}{MultiStyle.MULTI_LINE[1]}{MultiStyle.INDENT_AFTER_MIDDLE}Hello - {MultiStyle.INDENT_BEFORE_END}{MultiStyle.MULTI_LINE[2]} + {MultiStyle.MULTI_LINE.start} + {MultiStyle.INDENT_BEFORE_MIDDLE}{MultiStyle.MULTI_LINE.middle}{MultiStyle.INDENT_AFTER_MIDDLE}Hello + {MultiStyle.INDENT_BEFORE_END}{MultiStyle.MULTI_LINE.end} + """ + ) + expected = "Hello" + + assert MultiStyle.parse_comment(text) == expected + + +def test_parse_comment_sameline_multi(MultiStyle): + """If a multi-line comment style is on a single line, it should still be + parsed. + """ + text = cleandoc( + f""" + {MultiStyle.MULTI_LINE.start} Hello {MultiStyle.MULTI_LINE.end} """ ) expected = "Hello" diff --git a/tests/test_util.py b/tests/test_util.py index 7c216df19..f084d7e9f 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -176,6 +176,16 @@ def test_extract_with_ignore_block(): assert len(result.spdx_expressions) == 1 +def test_extract_sameline_multiline(): + """When a copyright line is in a multi-line style comment on a single line, + do not include the comment end pattern as part of the copyright. + """ + text = "" + result = _util.extract_spdx_info(text) + assert len(result.copyright_lines) == 1 + assert result.copyright_lines == {"SPDX-FileCopyrightText: Jane Doe"} + + def test_filter_ignore_block_with_comment_style(): """Test that the ignore block is properly removed if start and end markers are in comment style.