This repository has been archived by the owner on Apr 4, 2024. It is now read-only.
forked from diffplug/selfie
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
branch conflicts resolved can ignore
- Loading branch information
Showing
3 changed files
with
184 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
from typing import List | ||
|
||
|
||
class PerCharacterEscaper: | ||
def __init__( | ||
self, | ||
escape_code_point: int, | ||
escaped_code_points: List[int], | ||
escaped_by_code_points: List[int], | ||
): | ||
self.__escape_code_point = escape_code_point | ||
self.__escaped_code_points = escaped_code_points | ||
self.__escaped_by_code_points = escaped_by_code_points | ||
|
||
def __first_offset_needing_escape(self, input_string: str) -> int: | ||
length = len(input_string) | ||
for offset in range(length): | ||
codepoint = ord(input_string[offset]) | ||
if ( | ||
codepoint == self.__escape_code_point | ||
or codepoint in self.__escaped_code_points | ||
): | ||
return offset | ||
return -1 | ||
|
||
def escape(self, input_string: str) -> str: | ||
no_escapes = self.__first_offset_needing_escape(input_string) | ||
if no_escapes == -1: | ||
return input_string | ||
else: | ||
result = [] | ||
result.append(input_string[:no_escapes]) | ||
for char in input_string[no_escapes:]: | ||
codepoint = ord(char) | ||
if codepoint in self.__escaped_code_points: | ||
idx = self.__escaped_code_points.index(codepoint) | ||
result.append(chr(self.__escape_code_point)) | ||
result.append(chr(self.__escaped_by_code_points[idx])) | ||
else: | ||
result.append(char) | ||
return "".join(result) | ||
|
||
def unescape(self, input_string: str) -> str: | ||
if input_string.endswith( | ||
chr(self.__escape_code_point) | ||
) and not input_string.endswith(chr(self.__escape_code_point) * 2): | ||
raise ValueError( | ||
"Escape character '{}' can't be the last character in a string.".format( | ||
chr(self.__escape_code_point) | ||
) | ||
) | ||
|
||
no_escapes = self.__first_offset_needing_escape(input_string) | ||
if no_escapes == -1: | ||
return input_string | ||
else: | ||
result = [input_string[:no_escapes]] | ||
skip_next = False | ||
for i in range(no_escapes, len(input_string)): | ||
if skip_next: | ||
skip_next = False | ||
continue | ||
codepoint = ord(input_string[i]) | ||
if codepoint == self.__escape_code_point and (i + 1) < len( | ||
input_string | ||
): | ||
next_codepoint = ord(input_string[i + 1]) | ||
if next_codepoint in self.__escaped_by_code_points: | ||
idx = self.__escaped_by_code_points.index(next_codepoint) | ||
result.append(chr(self.__escaped_code_points[idx])) | ||
skip_next = True | ||
else: | ||
result.append(input_string[i + 1]) | ||
skip_next = True | ||
else: | ||
result.append(chr(codepoint)) | ||
return "".join(result) | ||
|
||
@classmethod | ||
def self_escape(cls, escape_policy): | ||
code_points = [ord(c) for c in escape_policy] | ||
escape_code_point = code_points[0] | ||
return cls(escape_code_point, code_points, code_points) | ||
|
||
@classmethod | ||
def specified_escape(cls, escape_policy): | ||
code_points = [ord(c) for c in escape_policy] | ||
if len(code_points) % 2 != 0: | ||
raise ValueError( | ||
"Escape policy string must have an even number of characters." | ||
) | ||
escape_code_point = code_points[0] | ||
escaped_code_points = code_points[0::2] | ||
escaped_by_code_points = code_points[1::2] | ||
return cls(escape_code_point, escaped_code_points, escaped_by_code_points) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
from .LineReader import LineReader as LineReader | ||
from .Slice import Slice as Slice | ||
from .SourceFile import SourceFile as SourceFile | ||
from .PerCharacterEscaper import PerCharacterEscaper as PerCharacterEscaper |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
import pytest | ||
|
||
from selfie_lib import PerCharacterEscaper | ||
|
||
|
||
class TestPerCharacterEscaper: | ||
def test_performance_optimization_self(self): | ||
escaper = PerCharacterEscaper.self_escape("`123") | ||
abc = "abc" | ||
# Correct use of 'is' for checking object identity. | ||
assert ( | ||
escaper.escape(abc) is abc | ||
), "Escape should return the original object when no change is made" | ||
assert ( | ||
escaper.unescape(abc) is abc | ||
), "Unescape should return the original object when no change is made" | ||
|
||
# Use '==' for checking value equality. | ||
assert ( | ||
escaper.escape("1") == "`1" | ||
), "Escaping '1' should prepend the escape character" | ||
assert ( | ||
escaper.escape("`") == "``" | ||
), "Escaping the escape character should duplicate it" | ||
assert ( | ||
escaper.escape("abc123`def") == "abc`1`2`3``def" | ||
), "Escaping 'abc123`def' did not produce the expected result" | ||
|
||
assert escaper.unescape("`1") == "1", "Unescaping '`1' should produce '1'" | ||
assert escaper.unescape("``") == "`", "Unescaping '``' should produce '`'" | ||
assert ( | ||
escaper.unescape("abc`1`2`3``def") == "abc123`def" | ||
), "Unescaping 'abc`1`2`3``def' did not produce the expected result" | ||
|
||
def test_performance_optimization_specific(self): | ||
escaper = PerCharacterEscaper.specified_escape("`a1b2c3d") | ||
abc = "abc" | ||
# Correct use of 'is' for object identity. | ||
assert ( | ||
escaper.escape(abc) is abc | ||
), "Escape should return the original object when no change is made" | ||
assert ( | ||
escaper.unescape(abc) is abc | ||
), "Unescape should return the original object when no change is made" | ||
|
||
# Use '==' for value equality. | ||
assert escaper.escape("1") == "`b", "Escaping '1' should produce '`b'" | ||
assert escaper.escape("`") == "`a", "Escaping '`' should produce '`a'" | ||
assert ( | ||
escaper.escape("abc123`def") == "abc`b`c`d`adef" | ||
), "Escaping 'abc123`def' did not produce the expected result" | ||
|
||
assert escaper.unescape("`b") == "1", "Unescaping '`b' should produce '1'" | ||
assert escaper.unescape("`a") == "`", "Unescaping '`a' should produce '`'" | ||
assert ( | ||
escaper.unescape("abc`1`2`3``def") == "abc123`def" | ||
), "Unescaping 'abc`1`2`3``def' did not produce the expected result" | ||
|
||
def test_corner_cases_self(self): | ||
escaper = PerCharacterEscaper.self_escape("`123") | ||
with pytest.raises(ValueError) as excinfo: | ||
escaper.unescape("`") | ||
assert ( | ||
str(excinfo.value) | ||
== "Escape character '`' can't be the last character in a string." | ||
), "Unescaping a string ending with a single escape character should raise ValueError" | ||
assert escaper.unescape("`a") == "a", "Unescaping '`a' should produce 'a'" | ||
|
||
def test_corner_cases_specific(self): | ||
escaper = PerCharacterEscaper.specified_escape("`a1b2c3d") | ||
with pytest.raises(ValueError) as excinfo: | ||
escaper.unescape("`") | ||
assert ( | ||
str(excinfo.value) | ||
== "Escape character '`' can't be the last character in a string." | ||
), "Unescaping a string ending with a single escape character should raise ValueError" | ||
assert escaper.unescape("`e") == "e", "Unescaping '`e' should produce 'e'" | ||
|
||
def test_roundtrip(self): | ||
escaper = PerCharacterEscaper.self_escape("`<>") | ||
|
||
def roundtrip(str): | ||
assert ( | ||
escaper.unescape(escaper.escape(str)) == str | ||
), f"Roundtrip of '{str}' did not return the original string" | ||
|
||
roundtrip("") | ||
roundtrip("<local>~`/") |