From b40e8991abeb4a5a4400d617ffeea4e0519b4dd9 Mon Sep 17 00:00:00 2001 From: Selina Delgado Date: Thu, 28 Mar 2024 10:47:05 -0700 Subject: [PATCH 1/4] string done but multistring test having problems --- python/selfie-lib/selfie_lib/Literals.py | 186 ++++++++++++++++++ python/selfie-lib/tests/LiteralString_test.py | 62 ++++++ 2 files changed, 248 insertions(+) create mode 100644 python/selfie-lib/tests/LiteralString_test.py diff --git a/python/selfie-lib/selfie_lib/Literals.py b/python/selfie-lib/selfie_lib/Literals.py index cd27e92e..5458efc6 100644 --- a/python/selfie-lib/selfie_lib/Literals.py +++ b/python/selfie-lib/selfie_lib/Literals.py @@ -3,6 +3,7 @@ from abc import abstractmethod from .EscapeLeadingWhitespace import EscapeLeadingWhitespace import io +import re T = TypeVar("T") @@ -71,6 +72,191 @@ def parse(self, string: str, language: Language) -> int: return int(string.replace("_", "")) +TRIPLE_QUOTE = '"""' + + +class LiteralString(LiteralFormat[str]): + def encode( + self, value: str, language: Language, encoding_policy: EscapeLeadingWhitespace + ) -> str: + if "/n" not in value: + if language == Language.PYTHON: + return self._encodeSinglePython(value) + else: + if language == Language.PYTHON: + return self.encodeMultiPython(value, encoding_policy) + + def parse(self, string: str, language: Language) -> str: + if not string.startswith(TRIPLE_QUOTE): + if language == Language.PYTHON: + return self._parseSinglePython(string) + else: + if language == Language.PYTHON: + return self.parseMultiPython(string) + + def _encodeSinglePython(self, value: str) -> str: + source = io.StringIO() + source.write('"') + for char in value: + if char == "\b": + source.write("\\b") + elif char == "\n": + source.write("\\n") + elif char == "\r": + source.write("\\r") + elif char == "\t": + source.write("\\t") + elif char == '"': + source.write('\\"') + elif char == "\\": + source.write("\\\\") + elif self._is_control_char(char): + source.write("\\u" + str(ord(char)).zfill(4)) + else: + source.write(char) + source.write('"') + return source.getvalue() + + def _is_control_char(self, c: str) -> bool: + return c in "\u0000\u001f" or c == "\u007f" + + # combined logic from parseSingleJava and parseSingleJavaish + def _parseSinglePython(self, source_with_quotes: str) -> str: + assert source_with_quotes.startswith('"') + assert source_with_quotes.endswith('"') + source = source_with_quotes[1:-1] + to_unescape = self.inline_backslashes(source) # changed from inline_dollar + return self._unescape_python(to_unescape) + + def encodeMultiPython( + self, arg: str, escape_leading_whitespace: EscapeLeadingWhitespace + ) -> str: + escape_backslashes = arg.replace("\\", "\\\\") + escape_triple_quotes = escape_backslashes.replace(TRIPLE_QUOTE, '\\"\\"\\"') + + def protect_trailing_whitespace(line): + if line.endswith(" "): + return line[:-1] + "\\s" + elif line.endswith("\t"): + return line[:-1] + "\\t" + else: + return line + + lines = escape_triple_quotes.splitlines() + protect_whitespace = "\n".join( + escape_leading_whitespace.escape_line( + protect_trailing_whitespace(line), "\\s", "\\t" + ) + for line in lines + ) + + common_prefix = min( + (line.lstrip() for line in protect_whitespace.splitlines() if line.strip()), + default="", + ) + if common_prefix: + lines = protect_whitespace.splitlines() + last = lines[-1] + protect_whitespace = "\n".join( + f"\\s{line[1:]}" + if line.startswith(" ") + else f"\\t{line[1:]}" + if line.startswith("\t") + else line + if line != last + else (f"\\s{line[1:]}" if line.startswith(" ") else f"\\t{line[1:]}") + for line in lines + ) + return f"{TRIPLE_QUOTE}\n{protect_whitespace}{TRIPLE_QUOTE}" + + _char_literal_pattern = re.compile(r"""\{'(\\?.)'\}""") + + def inline_backslashes(self, source: str) -> str: + def replace_char(char_literal: str) -> str: + if len(char_literal) == 1: + return char_literal + elif len(char_literal) == 2 and char_literal[0] == "\\": + if char_literal[1] == "t": + return "\t" + elif char_literal[1] == "b": + return "\b" + elif char_literal[1] == "n": + return "\n" + elif char_literal[1] == "r": + return "\r" + elif char_literal[1] == "'": + return "'" + elif char_literal[1] == "\\": + return "\\" + else: + raise ValueError(f"Unknown character literal {char_literal}") + else: + raise ValueError(f"Unknown character literal {char_literal}") + + return self._char_literal_pattern.sub( + lambda match: replace_char(match.group(1)), source + ) + + def _unescape_python(self, source: str) -> str: + value = io.StringIO() + i = 0 + while i < len(source): + c = source[i] + if c == "\\": + i += 1 + c = source[i] + if c == '"': + value.write('"') + elif c == "\\": + value.write("\\") + elif c == "b": + value.write("\b") + elif c == "f": + value.write("\f") + elif c == "n": + value.write("\n") + elif c == "r": + value.write("\r") + elif c == "s": + value.write(" ") + elif c == "t": + value.write("\t") + elif c == "u": + code = int(source[i + 1 : i + 5], 16) + value.write(chr(code)) + i += 4 + else: + raise ValueError(f"Unknown escape sequence {c}") + else: + value.write(c) + i += 1 + return value.getvalue() + + def parseMultiPython(self, source_with_quotes: str) -> str: + assert source_with_quotes.startswith(TRIPLE_QUOTE + "\n") + assert source_with_quotes.endswith(TRIPLE_QUOTE) + + source = source_with_quotes[len(TRIPLE_QUOTE) + 1 : -len(TRIPLE_QUOTE)] + lines = source.split("\n") + + common_prefix = min( + (line[: len(line) - len(line.lstrip())] for line in lines if line.strip()), + default="", + ) + + def remove_common_prefix(line: str) -> str: + return line[len(common_prefix) :] if common_prefix else line + + def handle_escape_sequences(line: str) -> str: + return self._unescape_python(line.rstrip()) + + return "\n".join( + handle_escape_sequences(remove_common_prefix(line)) + for line in lines + if line.strip() + ) + + class LiteralBoolean(LiteralFormat[bool]): def encode( self, value: bool, language: Language, encoding_policy: EscapeLeadingWhitespace diff --git a/python/selfie-lib/tests/LiteralString_test.py b/python/selfie-lib/tests/LiteralString_test.py new file mode 100644 index 00000000..e36ac316 --- /dev/null +++ b/python/selfie-lib/tests/LiteralString_test.py @@ -0,0 +1,62 @@ +import pytest +from selfie_lib.Literals import LiteralString +from selfie_lib.EscapeLeadingWhitespace import EscapeLeadingWhitespace + + +class TestLiteralString: + @pytest.mark.parametrize( + "value, expected", + [("1", '"1"'), ("\\", '"\\\\"'), ("1\n\tABC", '"1\\n\\tABC"')], + ) + def test_encode_single_java(self, value, expected): + literal_string = LiteralString() + actual = literal_string._encodeSinglePython(value) + print(actual) + assert actual == expected + + @pytest.mark.parametrize( + "value, expected", + [("1", "`1`"), ("\\", "`\\\\`"), ("1\n\tABC", "`1\\n\\tABC`")], + ) + def test_encode_single_java_with_dollars(self, value, expected): + literal_string = LiteralString() + actual = literal_string._encodeSinglePython(value) + assert actual == expected.replace("`", '"') + + # Failing due to EscapeLeadingWhitespace always being NEVER + # and not an Always option like in the original test case + @pytest.mark.parametrize( + "value, expected", + [ + ("1", "'''\n1'''"), + ("\\", "'''\n\\\\'''"), + (" leading\ntrailing ", "'''\n" + "\\s leading\n" + "trailing \\s'''"), + ], + ) + def test_encode_multi_java(self, value, expected): + literal_string = LiteralString() + actual = literal_string.encodeMultiPython(value, EscapeLeadingWhitespace.NEVER) + assert actual == expected.replace("'", '"') + + @pytest.mark.parametrize( + "value, expected", [("1", "1"), ("\\\\", "\\"), ("1\\n\\tABC", "1\n\tABC")] + ) + def test_parse_single_java(self, value, expected): + literal_string = LiteralString() + actual = literal_string._parseSinglePython(f'"{value.replace("'", "\"")}"') + assert actual == expected + + @pytest.mark.parametrize( + "value, expected", + [ + ("\n123\nabc", "123\nabc"), + ("\n 123\n abc", "123\nabc"), + ("\n 123 \n abc\t", "123\nabc"), + ("\n 123 \n abc\t", "123\nabc"), + ("\n 123 \\s\n abc\t\\s", "123 \nabc\t "), + ], + ) + def test_parse_multi_java(self, value, expected): + literal_string = LiteralString() + actual = literal_string.parseMultiPython(f'"""{value.replace("'", "\"")}"""') + assert actual == expected From 6a996d6d6f6caebbf90bc263d1846883a0b48f99 Mon Sep 17 00:00:00 2001 From: Selina Delgado Date: Mon, 1 Apr 2024 21:33:49 -0700 Subject: [PATCH 2/4] Removed common prefix and fixed space --- python/selfie-lib/selfie_lib/Literals.py | 21 ++------------------- 1 file changed, 2 insertions(+), 19 deletions(-) diff --git a/python/selfie-lib/selfie_lib/Literals.py b/python/selfie-lib/selfie_lib/Literals.py index 5458efc6..e6af24ab 100644 --- a/python/selfie-lib/selfie_lib/Literals.py +++ b/python/selfie-lib/selfie_lib/Literals.py @@ -136,7 +136,7 @@ def encodeMultiPython( def protect_trailing_whitespace(line): if line.endswith(" "): - return line[:-1] + "\\s" + return line[:-1] + "\x20" elif line.endswith("\t"): return line[:-1] + "\\t" else: @@ -145,28 +145,11 @@ def protect_trailing_whitespace(line): lines = escape_triple_quotes.splitlines() protect_whitespace = "\n".join( escape_leading_whitespace.escape_line( - protect_trailing_whitespace(line), "\\s", "\\t" + protect_trailing_whitespace(line), "\x20", "\\t" ) for line in lines ) - common_prefix = min( - (line.lstrip() for line in protect_whitespace.splitlines() if line.strip()), - default="", - ) - if common_prefix: - lines = protect_whitespace.splitlines() - last = lines[-1] - protect_whitespace = "\n".join( - f"\\s{line[1:]}" - if line.startswith(" ") - else f"\\t{line[1:]}" - if line.startswith("\t") - else line - if line != last - else (f"\\s{line[1:]}" if line.startswith(" ") else f"\\t{line[1:]}") - for line in lines - ) return f"{TRIPLE_QUOTE}\n{protect_whitespace}{TRIPLE_QUOTE}" _char_literal_pattern = re.compile(r"""\{'(\\?.)'\}""") From 4d4dcf14d65671b9f1c135922bb949406259e8ff Mon Sep 17 00:00:00 2001 From: Selina Delgado Date: Mon, 1 Apr 2024 21:34:47 -0700 Subject: [PATCH 3/4] changed java names and fixed encode multi test --- python/selfie-lib/tests/LiteralString_test.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/python/selfie-lib/tests/LiteralString_test.py b/python/selfie-lib/tests/LiteralString_test.py index e36ac316..dafd51be 100644 --- a/python/selfie-lib/tests/LiteralString_test.py +++ b/python/selfie-lib/tests/LiteralString_test.py @@ -8,7 +8,7 @@ class TestLiteralString: "value, expected", [("1", '"1"'), ("\\", '"\\\\"'), ("1\n\tABC", '"1\\n\\tABC"')], ) - def test_encode_single_java(self, value, expected): + def test_encode_single(self, value, expected): literal_string = LiteralString() actual = literal_string._encodeSinglePython(value) print(actual) @@ -18,7 +18,7 @@ def test_encode_single_java(self, value, expected): "value, expected", [("1", "`1`"), ("\\", "`\\\\`"), ("1\n\tABC", "`1\\n\\tABC`")], ) - def test_encode_single_java_with_dollars(self, value, expected): + def test_encode_single_with_dollars(self, value, expected): literal_string = LiteralString() actual = literal_string._encodeSinglePython(value) assert actual == expected.replace("`", '"') @@ -30,10 +30,13 @@ def test_encode_single_java_with_dollars(self, value, expected): [ ("1", "'''\n1'''"), ("\\", "'''\n\\\\'''"), - (" leading\ntrailing ", "'''\n" + "\\s leading\n" + "trailing \\s'''"), + ( + " leading\ntrailing ", + "'''\n" + "\x20 leading\n" + "trailing \x20'''", + ), ], ) - def test_encode_multi_java(self, value, expected): + def test_encode_multi(self, value, expected): literal_string = LiteralString() actual = literal_string.encodeMultiPython(value, EscapeLeadingWhitespace.NEVER) assert actual == expected.replace("'", '"') @@ -41,7 +44,7 @@ def test_encode_multi_java(self, value, expected): @pytest.mark.parametrize( "value, expected", [("1", "1"), ("\\\\", "\\"), ("1\\n\\tABC", "1\n\tABC")] ) - def test_parse_single_java(self, value, expected): + def test_parse_single(self, value, expected): literal_string = LiteralString() actual = literal_string._parseSinglePython(f'"{value.replace("'", "\"")}"') assert actual == expected @@ -56,7 +59,7 @@ def test_parse_single_java(self, value, expected): ("\n 123 \\s\n abc\t\\s", "123 \nabc\t "), ], ) - def test_parse_multi_java(self, value, expected): + def test_parse_multi(self, value, expected): literal_string = LiteralString() actual = literal_string.parseMultiPython(f'"""{value.replace("'", "\"")}"""') assert actual == expected From dc70f832b730aa499ff813121d3fcba2b2efa305 Mon Sep 17 00:00:00 2001 From: Selina Delgado Date: Thu, 4 Apr 2024 10:54:51 -0700 Subject: [PATCH 4/4] Added the unimplemented throw and updated \x to \u --- python/selfie-lib/selfie_lib/Literals.py | 26 ++++++++++++------- python/selfie-lib/tests/LiteralString_test.py | 2 +- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/python/selfie-lib/selfie_lib/Literals.py b/python/selfie-lib/selfie_lib/Literals.py index e6af24ab..3873178a 100644 --- a/python/selfie-lib/selfie_lib/Literals.py +++ b/python/selfie-lib/selfie_lib/Literals.py @@ -79,20 +79,26 @@ class LiteralString(LiteralFormat[str]): def encode( self, value: str, language: Language, encoding_policy: EscapeLeadingWhitespace ) -> str: - if "/n" not in value: - if language == Language.PYTHON: + if language == Language.PYTHON: + if "/n" not in value: return self._encodeSinglePython(value) - else: - if language == Language.PYTHON: + else: return self.encodeMultiPython(value, encoding_policy) + else: + raise NotImplementedError( + "Encoding for language {} is not implemented.".format(language) + ) def parse(self, string: str, language: Language) -> str: - if not string.startswith(TRIPLE_QUOTE): - if language == Language.PYTHON: + if language == Language.PYTHON: + if not string.startswith(TRIPLE_QUOTE): return self._parseSinglePython(string) - else: - if language == Language.PYTHON: + else: return self.parseMultiPython(string) + else: + raise NotImplementedError( + "Encoding for language {} is not implemented.".format(language) + ) def _encodeSinglePython(self, value: str) -> str: source = io.StringIO() @@ -136,7 +142,7 @@ def encodeMultiPython( def protect_trailing_whitespace(line): if line.endswith(" "): - return line[:-1] + "\x20" + return line[:-1] + "\u0020" elif line.endswith("\t"): return line[:-1] + "\\t" else: @@ -145,7 +151,7 @@ def protect_trailing_whitespace(line): lines = escape_triple_quotes.splitlines() protect_whitespace = "\n".join( escape_leading_whitespace.escape_line( - protect_trailing_whitespace(line), "\x20", "\\t" + protect_trailing_whitespace(line), "\u0020", "\\t" ) for line in lines ) diff --git a/python/selfie-lib/tests/LiteralString_test.py b/python/selfie-lib/tests/LiteralString_test.py index dafd51be..f8a77fb1 100644 --- a/python/selfie-lib/tests/LiteralString_test.py +++ b/python/selfie-lib/tests/LiteralString_test.py @@ -32,7 +32,7 @@ def test_encode_single_with_dollars(self, value, expected): ("\\", "'''\n\\\\'''"), ( " leading\ntrailing ", - "'''\n" + "\x20 leading\n" + "trailing \x20'''", + "'''\n" + "\u0020 leading\n" + "trailing \u0020'''", ), ], )