Skip to content

Commit

Permalink
Merge pull request #87 from akaihola/newline-splitting
Browse files Browse the repository at this point in the history
Split newlines only at Python universal newlines (LF, CRLF, CR)
akaihola authored Jan 7, 2025

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
2 parents 5d3dbe4 + 3035dcc commit b859b19
Showing 3 changed files with 25 additions and 1 deletion.
2 changes: 2 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
@@ -15,6 +15,8 @@ Removed

Fixed
-----
- Only split input files at Python's universal newlines (LF, CRLF, CR), not on more
exotic newline sequences. This fixes some edge cases in Darker.


2.1.0_ - 2024-11-19
6 changes: 6 additions & 0 deletions src/darkgraylib/tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -138,6 +138,12 @@ def test_textdocument_encoded_string(encoding, newline, expect):
dict(
doc=TextDocument(string="zéro\r\nun\r\n", newline="\r\n"), expect=("zéro", "un")
),
dict(
doc=TextDocument(
string="# coding: iso-8859-5\n# б\x85б\x86\n", encoding="iso-8859-5"
),
expect=("# coding: iso-8859-5", "# б\x85б\x86"),
),
)
def test_textdocument_lines(doc, expect):
"""TextDocument.lines is correct after parsing a string with different newlines"""
18 changes: 17 additions & 1 deletion src/darkgraylib/utils.py
Original file line number Diff line number Diff line change
@@ -22,6 +22,22 @@ def detect_newline(string: str) -> str:
return "\n"


def normalize_newlines(string: str) -> str:
"""Normalize newlines in a string to LF"""
return io.IncrementalNewlineDecoder(None, True).decode(string)


def splitlines(string: str) -> list[str]:
"""Split a string into lines at universal newlines."""
if not string:
return []
return (
normalize_newlines(string) # Normalize newlines to LF
.rstrip("\n") # Remove trailing newline
.split("\n") # Split into lines
)


class TextDocument:
"""Store & handle a multi-line text document, either as a string or list of lines"""

@@ -65,7 +81,7 @@ def encoded_string(self) -> bytes:
def lines(self) -> TextLines:
"""Return the document as a list of lines converting and caching if necessary"""
if self._lines is None:
self._lines = tuple((self._string or "").splitlines())
self._lines = tuple(splitlines(self._string or ""))
return self._lines

@property

0 comments on commit b859b19

Please sign in to comment.