Skip to content

Commit

Permalink
Merge pull request #593 from carmenbianca/fix-sameline-multiline
Browse files Browse the repository at this point in the history
Fix sameline multiline
  • Loading branch information
carmenbianca authored Oct 11, 2022
2 parents b99581b + c4b2ba3 commit 3450858
Show file tree
Hide file tree
Showing 5 changed files with 75 additions and 40 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,8 @@ The versions follow [semantic versioning](https://semver.org).
- Sanitize xargs input in scripts documentation
- License identifiers in comments with symmetrical ASCII art frames are now
properly detected (#560)

- Fixed an error where copyright statements contained within a multi-line
comment style on a single line could not be parsed (#593).
- In PHP files, add header after `<?php` (#543).

### Security
Expand Down
64 changes: 37 additions & 27 deletions src/reuse/_comment.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import logging
import operator
from textwrap import dedent
from typing import List
from typing import List, NamedTuple

_LOGGER = logging.getLogger(__name__)

Expand All @@ -30,14 +30,24 @@ class CommentCreateError(Exception):
"""An error occurred during the creation of a comment."""


class MultiLineSegments(NamedTuple):
"""Components that make up a multi-line comment style, e.g. '/*', '*', and
'*/'.
"""

start: str
middle: str
end: str


class CommentStyle:
"""Base class for comment style."""

SINGLE_LINE = ""
INDENT_AFTER_SINGLE = ""
# (start, middle, end)
# e.g., ("/*", "*", "*/")
MULTI_LINE = ("", "", "")
MULTI_LINE = MultiLineSegments("", "", "")
INDENT_BEFORE_MIDDLE = ""
INDENT_AFTER_MIDDLE = ""
INDENT_BEFORE_END = ""
Expand All @@ -50,7 +60,7 @@ def can_handle_single(cls) -> bool:
@classmethod
def can_handle_multi(cls) -> bool:
"""Whether the :class:`CommentStyle` can handle multi-line comments."""
return all((cls.MULTI_LINE[0], cls.MULTI_LINE[2]))
return all((cls.MULTI_LINE.start, cls.MULTI_LINE.end))

@classmethod
def create_comment(cls, text: str, force_multi: bool = False) -> str:
Expand Down Expand Up @@ -90,19 +100,19 @@ def _create_comment_multi(cls, text: str) -> str:
if not cls.can_handle_multi():
raise CommentCreateError(f"{cls} cannot create multi-line comments")
result = []
result.append(cls.MULTI_LINE[0])
result.append(cls.MULTI_LINE.start)
for line in text.split("\n"):
if cls.MULTI_LINE[2] in text:
if cls.MULTI_LINE.end in text:
raise CommentCreateError(
f"'{line}' contains a premature comment delimiter"
)
line_result = ""
if cls.MULTI_LINE[1]:
line_result += cls.INDENT_BEFORE_MIDDLE + cls.MULTI_LINE[1]
if cls.MULTI_LINE.middle:
line_result += cls.INDENT_BEFORE_MIDDLE + cls.MULTI_LINE.middle
if line:
line_result += cls.INDENT_AFTER_MIDDLE + line
result.append(line_result)
result.append(cls.INDENT_BEFORE_END + cls.MULTI_LINE[2])
result.append(cls.INDENT_BEFORE_END + cls.MULTI_LINE.end)
return "\n".join(result)

@classmethod
Expand Down Expand Up @@ -139,9 +149,9 @@ def _parse_comment_single(cls, text: str) -> str:

@classmethod
def _remove_middle_marker(cls, line: str) -> str:
if cls.MULTI_LINE[1]:
if cls.MULTI_LINE.middle:
possible_line = line.lstrip()
prefix = cls.MULTI_LINE[1]
prefix = cls.MULTI_LINE.middle
if possible_line.startswith(prefix):
line = possible_line.lstrip(prefix)
# Note to future self: line.removeprefix would be preferable
Expand Down Expand Up @@ -174,11 +184,11 @@ def _parse_comment_multi(cls, text: str) -> str:
last = None # Set this later.
last_is_first = True

if not first.startswith(cls.MULTI_LINE[0]):
if not first.startswith(cls.MULTI_LINE.start):
raise CommentParseError(
f"'{first}' does not start with a comment marker"
)
first = first.lstrip(cls.MULTI_LINE[0])
first = first.lstrip(cls.MULTI_LINE.start)
first = first.lstrip()

for line in lines:
Expand All @@ -188,11 +198,11 @@ def _parse_comment_multi(cls, text: str) -> str:
if last_is_first:
last = first
first = ""
if not last.endswith(cls.MULTI_LINE[2]):
if not last.endswith(cls.MULTI_LINE.end):
raise CommentParseError(
f"'{last}' does not end with a comment delimiter"
)
last = last.rstrip(cls.MULTI_LINE[2])
last = last.rstrip(cls.MULTI_LINE.end)
last = last.rstrip()
last = cls._remove_middle_marker(last)

Expand Down Expand Up @@ -223,11 +233,11 @@ def comment_at_first_character(cls, text: str) -> str:
break
end = i
return "\n".join(lines[0 : end + 1])
if cls.can_handle_multi() and text.startswith(cls.MULTI_LINE[0]):
if cls.can_handle_multi() and text.startswith(cls.MULTI_LINE.start):
end = 0
for i, line in enumerate(lines):
end = i
if line.endswith(cls.MULTI_LINE[2]):
if line.endswith(cls.MULTI_LINE.end):
break
else:
raise CommentParseError("Comment block never delimits")
Expand All @@ -245,15 +255,15 @@ class AppleScriptCommentStyle(CommentStyle):

SINGLE_LINE = "--"
INDENT_AFTER_SINGLE = " "
MULTI_LINE = ("(*", "", "*)")
MULTI_LINE = MultiLineSegments("(*", "", "*)")


class AspxCommentStyle(CommentStyle):
"""ASPX comment style."""

_shorthand = "aspx"

MULTI_LINE = ("<%--", "", "--%>")
MULTI_LINE = MultiLineSegments("<%--", "", "--%>")


class BatchFileCommentStyle(CommentStyle):
Expand All @@ -270,7 +280,7 @@ class BibTexCommentStyle(CommentStyle):

_shorthand = "bibtex"

MULTI_LINE = ("@Comment{", "", "}")
MULTI_LINE = MultiLineSegments("@Comment{", "", "}")


class CCommentStyle(CommentStyle):
Expand All @@ -280,7 +290,7 @@ class CCommentStyle(CommentStyle):

SINGLE_LINE = "//"
INDENT_AFTER_SINGLE = " "
MULTI_LINE = ("/*", "*", "*/")
MULTI_LINE = MultiLineSegments("/*", "*", "*/")
INDENT_BEFORE_MIDDLE = " "
INDENT_AFTER_MIDDLE = " "
INDENT_BEFORE_END = " "
Expand All @@ -291,7 +301,7 @@ class CssCommentStyle(CommentStyle):

_shorthand = "css"

MULTI_LINE = ("/*", "*", "*/")
MULTI_LINE = MultiLineSegments("/*", "*", "*/")
INDENT_BEFORE_MIDDLE = " "
INDENT_AFTER_MIDDLE = " "
INDENT_BEFORE_END = " "
Expand Down Expand Up @@ -327,15 +337,15 @@ class FtlCommentStyle(CommentStyle):

_shorthand = "ftl"

MULTI_LINE = ("<#--", "", "-->")
MULTI_LINE = MultiLineSegments("<#--", "", "-->")


class HandlebarsCommentStyle(CommentStyle):
"""Handlebars comment style."""

_shorthand = "handlebars"

MULTI_LINE = ("{{!--", "", "--}}")
MULTI_LINE = MultiLineSegments("{{!--", "", "--}}")


class HaskellCommentStyle(CommentStyle):
Expand All @@ -352,15 +362,15 @@ class HtmlCommentStyle(CommentStyle):

_shorthand = "html"

MULTI_LINE = ("<!--", "", "-->")
MULTI_LINE = MultiLineSegments("<!--", "", "-->")


class JinjaCommentStyle(CommentStyle):
"""Jinja2 comment style."""

_shorthand = "jinja"

MULTI_LINE = ("{#", "", "#}")
MULTI_LINE = MultiLineSegments("{#", "", "#}")


class LispCommentStyle(CommentStyle):
Expand All @@ -386,7 +396,7 @@ class MlCommentStyle(CommentStyle):

_shorthand = "ml"

MULTI_LINE = ("(*", "*", "*)")
MULTI_LINE = MultiLineSegments("(*", "*", "*)")
INDENT_BEFORE_MIDDLE = " "
INDENT_AFTER_MIDDLE = " "
INDENT_BEFORE_END = " "
Expand All @@ -399,7 +409,7 @@ class PlantUmlCommentStyle(CommentStyle):

SINGLE_LINE = "'"
INDENT_AFTER_SINGLE = " "
MULTI_LINE = ("/'", "'", "'/")
MULTI_LINE = MultiLineSegments("/'", "'", "'/")
INDENT_BEFORE_MIDDLE = " "
INDENT_AFTER_MIDDLE = " "
INDENT_BEFORE_END = " "
Expand Down
12 changes: 6 additions & 6 deletions src/reuse/_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,10 @@
"".join(
{
r"(?:{})*".format( # pylint: disable=consider-using-f-string
re.escape(style.MULTI_LINE[2])
re.escape(style.MULTI_LINE.end)
)
for style in _all_style_classes()
if style.MULTI_LINE[2]
if style.MULTI_LINE.end
}
)
)
Expand All @@ -60,17 +60,17 @@
re.compile(
r"(?P<copyright>(?P<prefix>SPDX-FileCopyrightText:)\s+"
r"((?P<year>\d{4} - \d{4}|\d{4}),?\s+)?"
r"(?P<statement>.*)?)" + _END_PATTERN
r"(?P<statement>.*?))" + _END_PATTERN
),
re.compile(
r"(?P<copyright>(?P<prefix>Copyright(\s?\([cC]\))?)\s+"
r"((?P<year>\d{4} - \d{4}|\d{4}),?\s+)?"
r"(?P<statement>.*)?)" + _END_PATTERN
r"(?P<statement>.*?))" + _END_PATTERN
),
re.compile(
r"(?P<copyright>(?P<prefix>©)\s+"
r"((?P<year>\d{4} - \d{4}|\d{4}),?\s+)?"
r"(?P<statement>.*)?)" + _END_PATTERN
r"(?P<statement>.*?))" + _END_PATTERN
),
]

Expand Down Expand Up @@ -282,7 +282,7 @@ def extract_spdx_info(text: str) -> SpdxInfo:
for pattern in _COPYRIGHT_PATTERNS:
match = pattern.search(line)
if match is not None:
copyright_matches.add(match.groupdict()["copyright"])
copyright_matches.add(match.groupdict()["copyright"].strip())
break

return SpdxInfo(expressions, copyright_matches)
Expand Down
26 changes: 20 additions & 6 deletions tests/test_comment.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,9 @@ def test_create_comment_generic_multi(MultiStyle):
text = "Hello"
expected = cleandoc(
f"""
{MultiStyle.MULTI_LINE[0]}
{MultiStyle.INDENT_BEFORE_MIDDLE}{MultiStyle.MULTI_LINE[1]}{MultiStyle.INDENT_AFTER_MIDDLE}Hello
{MultiStyle.INDENT_BEFORE_END}{MultiStyle.MULTI_LINE[2]}
{MultiStyle.MULTI_LINE.start}
{MultiStyle.INDENT_BEFORE_MIDDLE}{MultiStyle.MULTI_LINE.middle}{MultiStyle.INDENT_AFTER_MIDDLE}Hello
{MultiStyle.INDENT_BEFORE_END}{MultiStyle.MULTI_LINE.end}
"""
)

Expand All @@ -78,9 +78,23 @@ def test_parse_comment_generic_multi(MultiStyle):
# pylint: disable=line-too-long
text = cleandoc(
f"""
{MultiStyle.MULTI_LINE[0]}
{MultiStyle.INDENT_BEFORE_MIDDLE}{MultiStyle.MULTI_LINE[1]}{MultiStyle.INDENT_AFTER_MIDDLE}Hello
{MultiStyle.INDENT_BEFORE_END}{MultiStyle.MULTI_LINE[2]}
{MultiStyle.MULTI_LINE.start}
{MultiStyle.INDENT_BEFORE_MIDDLE}{MultiStyle.MULTI_LINE.middle}{MultiStyle.INDENT_AFTER_MIDDLE}Hello
{MultiStyle.INDENT_BEFORE_END}{MultiStyle.MULTI_LINE.end}
"""
)
expected = "Hello"

assert MultiStyle.parse_comment(text) == expected


def test_parse_comment_sameline_multi(MultiStyle):
"""If a multi-line comment style is on a single line, it should still be
parsed.
"""
text = cleandoc(
f"""
{MultiStyle.MULTI_LINE.start} Hello {MultiStyle.MULTI_LINE.end}
"""
)
expected = "Hello"
Expand Down
10 changes: 10 additions & 0 deletions tests/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,16 @@ def test_extract_with_ignore_block():
assert len(result.spdx_expressions) == 1


def test_extract_sameline_multiline():
"""When a copyright line is in a multi-line style comment on a single line,
do not include the comment end pattern as part of the copyright.
"""
text = "<!-- SPDX-FileCopyrightText: Jane Doe -->"
result = _util.extract_spdx_info(text)
assert len(result.copyright_lines) == 1
assert result.copyright_lines == {"SPDX-FileCopyrightText: Jane Doe"}


def test_filter_ignore_block_with_comment_style():
"""Test that the ignore block is properly removed if start and end markers
are in comment style.
Expand Down

0 comments on commit 3450858

Please sign in to comment.