Skip to content
This repository has been archived by the owner on Apr 4, 2024. It is now read-only.

Creating SourceFile #20

Merged
merged 19 commits into from
Mar 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions python/selfie-lib/selfie_lib/EscapeLeadingWhitespace.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from enum import Enum, auto


class EscapeLeadingWhitespace(Enum):
NEVER = auto()

def escape_line(self, line: str, space: str, tab: str) -> str:
return line

@staticmethod
def appropriate_for(file_content: str) -> "EscapeLeadingWhitespace":
return EscapeLeadingWhitespace.NEVER
59 changes: 59 additions & 0 deletions python/selfie-lib/selfie_lib/Literals.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
from enum import Enum, auto
from typing import Protocol, TypeVar
from abc import abstractmethod
from .EscapeLeadingWhitespace import EscapeLeadingWhitespace

T = TypeVar("T")


class Language(Enum):
PYTHON = auto()

@classmethod
def from_filename(cls, filename: str) -> "Language":
extension = filename.rsplit(".", 1)[-1]
if extension == "py":
return cls.PYTHON
else:
raise ValueError(f"Unknown language for file {filename}")


class LiteralValue:
def __init__(self, expected: T | None, actual: T, format: "LiteralFormat") -> None:
self.expected = expected
self.actual = actual
self.format = format


class LiteralFormat(Protocol[T]):
@abstractmethod
def encode(
self, value: T, language: Language, encoding_policy: "EscapeLeadingWhitespace"
) -> str:
raise NotImplementedError("Subclasses must implement the encode method")

@abstractmethod
def parse(self, string: str, language: Language) -> T:
raise NotImplementedError("Subclasses must implement the parse method")


MAX_RAW_NUMBER = 1000
PADDING_SIZE = len(str(MAX_RAW_NUMBER)) - 1


class LiteralBoolean(LiteralFormat[bool]):
def encode(
self, value: bool, language: Language, encoding_policy: EscapeLeadingWhitespace
) -> str:
return str(value)

def __to_boolean_strict(self, string: str) -> bool:
if string.lower() == "true":
return True
elif string.lower() == "false":
return False
else:
raise ValueError("String is not a valid boolean representation: " + string)

def parse(self, string: str, language: Language) -> bool:
return self.__to_boolean_strict(string)
188 changes: 188 additions & 0 deletions python/selfie-lib/selfie_lib/SourceFile.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
from .Slice import Slice
from .Literals import Language, LiteralFormat, LiteralValue
from .EscapeLeadingWhitespace import EscapeLeadingWhitespace
from typing import Any


class SourceFile:
TRIPLE_QUOTE = '"""'

def __init__(self, filename: str, content: str) -> None:
self.__unix_newlines: bool = "\r" not in content
self.__content_slice: Slice = Slice(content.replace("\r\n", "\n"))
self.__language: Language = Language.from_filename(filename)
self.__escape_leading_whitespace = EscapeLeadingWhitespace.appropriate_for(
self.__content_slice.__str__()
)

@property
def as_string(self) -> str:
return (
self.__content_slice.__str__()
if self.__unix_newlines
else self.__content_slice.__str__().replace("\n", "\r\n")
)

class ToBeLiteral:
def __init__(
self,
dot_fun_open_paren: str,
function_call_plus_arg: Slice,
arg: Slice,
language: Language,
escape_leading_whitespace: EscapeLeadingWhitespace,
) -> None:
self.__dot_fun_open_paren = dot_fun_open_paren
self.__function_call_plus_arg = function_call_plus_arg
self.__arg = arg
self.__language = language
self.__escape_leading_whitespace = escape_leading_whitespace

def _get_function_call_plus_arg(self):
return self.__function_call_plus_arg

def _get_arg(self):
return self.__arg

def set_literal_and_get_newline_delta(self, literal_value: LiteralValue) -> int:
encoded = literal_value.format.encode(
literal_value.actual, self.__language, self.__escape_leading_whitespace
)
round_tripped = literal_value.format.parse(encoded, self.__language)
if round_tripped != literal_value.actual:
raise ValueError(
f"There is an error in {literal_value.format.__class__.__name__}, "
"the following value isn't round tripping.\n"
f"Please report this error and the data below at "
"https://github.com/diffplug/selfie/issues/new\n"
f"```\n"
f"ORIGINAL\n{literal_value.actual}\n"
f"ROUNDTRIPPED\n{round_tripped}\n"
f"ENCODED ORIGINAL\n{encoded}\n"
f"```\n"
)
existing_newlines = self.__function_call_plus_arg.count("\n")
new_newlines = encoded.count("\n")
self.__content_slice = self.__function_call_plus_arg.replaceSelfWith(
f"{self.__dot_fun_open_paren}{encoded})"
)
return new_newlines - existing_newlines

def parse_literal(self, literal_format: LiteralFormat) -> Any:
return literal_format.parse(self.__arg.__str__(), self.__language)

def find_on_line(self, to_find: str, line_one_indexed: int) -> Slice:
line_content = self.__content_slice.unixLine(line_one_indexed)
idx = line_content.indexOf(to_find)
if idx == -1:
raise AssertionError(
f"Expected to find `{to_find}` on line {line_one_indexed}, "
f"but there was only `{line_content}`"
)
start_index = idx
end_index = idx + len(to_find)
return line_content.subSequence(start_index, end_index)

def replace_on_line(self, line_one_indexed: int, find: str, replace: str) -> None:
assert "\n" not in find
assert "\n" not in replace
line_content = self.__content_slice.unixLine(line_one_indexed).__str__()
new_content = line_content.replace(find, replace)
self.__content_slice = Slice(self.__content_slice.replaceSelfWith(new_content))

def parse_to_be_like(self, line_one_indexed: int) -> ToBeLiteral:
line_content = self.__content_slice.unixLine(line_one_indexed)
dot_fun_open_paren = None

for to_be_like in TO_BE_LIKES:
idx = line_content.indexOf(to_be_like)
if idx != -1:
dot_fun_open_paren = to_be_like
break
if dot_fun_open_paren is None:
raise AssertionError(
f"Expected to find inline assertion on line {line_one_indexed}, but there was only `{line_content}`"
)

dot_function_call_in_place = line_content.indexOf(dot_fun_open_paren)
dot_function_call = dot_function_call_in_place + line_content.startIndex
arg_start = dot_function_call + len(dot_fun_open_paren)

if self.__content_slice.__len__() == arg_start:
raise AssertionError(
f"Appears to be an unclosed function call `{dot_fun_open_paren}` "
f"on line {line_one_indexed}"
)
while self.__content_slice[arg_start].isspace():
arg_start += 1
if self.__content_slice.__len__() == arg_start:
raise AssertionError(
f"Appears to be an unclosed function call `{dot_fun_open_paren}` "
f"on line {line_one_indexed}"
)

end_arg = -1
end_paren = 0
if self.__content_slice[arg_start] == '"':
if self.__content_slice[arg_start].startswith(self.TRIPLE_QUOTE):
end_arg = self.__content_slice.indexOf(
self.TRIPLE_QUOTE, arg_start + len(self.TRIPLE_QUOTE)
)
if end_arg == -1:
raise AssertionError(
f"Appears to be an unclosed multiline string literal `{self.TRIPLE_QUOTE}` "
f"on line {line_one_indexed}"
)
else:
end_arg += len(self.TRIPLE_QUOTE)
end_paren = end_arg
else:
end_arg = arg_start + 1
while (
self.__content_slice[end_arg] != '"'
or self.__content_slice[end_arg - 1] == "\\"
):
end_arg += 1
if end_arg == self.__content_slice.__len__():
raise AssertionError(
f'Appears to be an unclosed string literal `"` '
f"on line {line_one_indexed}"
)
end_arg += 1
end_paren = end_arg
else:
end_arg = arg_start
while not self.__content_slice[end_arg].isspace():
if self.__content_slice[end_arg] == ")":
break
end_arg += 1
if end_arg == self.__content_slice.__len__():
raise AssertionError(
f"Appears to be an unclosed numeric literal "
f"on line {line_one_indexed}"
)
end_paren = end_arg
while self.__content_slice[end_paren] != ")":
if not self.__content_slice[end_paren].isspace():
raise AssertionError(
f"Non-primitive literal in `{dot_fun_open_paren}` starting at "
f"line {line_one_indexed}: error for character "
f"`{self.__content_slice[end_paren]}` on line "
f"{self.__content_slice.baseLineAtOffset(end_paren)}"
)
end_paren += 1
if end_paren == self.__content_slice.__len__():
raise AssertionError(
f"Appears to be an unclosed function call `{dot_fun_open_paren}` "
f"starting at line {line_one_indexed}"
)
return self.ToBeLiteral(
dot_fun_open_paren.replace("_TODO", ""),
self.__content_slice.subSequence(dot_function_call, end_paren + 1),
self.__content_slice.subSequence(arg_start, end_arg),
self.__language,
self.__escape_leading_whitespace,
)


TO_BE_LIKES = [".toBe(", ".toBe_TODO(", ".toBeBase64(", ".toBeBase64_TODO("]
1 change: 1 addition & 0 deletions python/selfie-lib/selfie_lib/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from .LineReader import LineReader as LineReader
from .Slice import Slice as Slice
from .SourceFile import SourceFile as SourceFile
from .PerCharacterEscaper import PerCharacterEscaper as PerCharacterEscaper
90 changes: 90 additions & 0 deletions python/selfie-lib/tests/SourceFile_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
from selfie_lib import SourceFile


def python_test(source_raw, function_call_plus_arg_raw, arg_raw=""):
source = source_raw.replace("'", '"')
function_call_plus_arg = function_call_plus_arg_raw.replace("'", '"')
arg = arg_raw.replace("'", '"')
parsed = SourceFile("UnderTest.py", source)
to_be_literal = parsed.parse_to_be_like(1)
assert to_be_literal._get_function_call_plus_arg() == function_call_plus_arg
assert to_be_literal._get_arg() == arg


def python_test_error(source_raw, error_msg):
try:
python_test(source_raw, "unusedArg")
except AssertionError as e:
assert str(e) == error_msg


def todo():
python_test(".toBe_TODO()", ".toBe_TODO()", "")
python_test(" .toBe_TODO() ", ".toBe_TODO()", "")
python_test(" .toBe_TODO( ) ", ".toBe_TODO( )", "")
python_test(" .toBe_TODO( \n ) ", ".toBe_TODO( \n )", "")


def numeric():
python_test(".toBe(7)", ".toBe(7)", "7")
python_test(" .toBe(7)", ".toBe(7)", "7")
python_test(".toBe(7) ", ".toBe(7)", "7")
python_test(" .toBe(7) ", ".toBe(7)", "7")
python_test(" .toBe( 7 ) ", ".toBe( 7 )", "7")
python_test(" .toBe(\n7) ", ".toBe(\n7)", "7")
python_test(" .toBe(7\n) ", ".toBe(7\n)", "7")


def single_line_string():
python_test(".toBe('7')", "'7'")
python_test(".toBe('')", "''")
python_test(".toBe( '' )", "''")
python_test(".toBe( \n '' \n )", "''")
python_test(".toBe( \n '78' \n )", "'78'")
python_test(".toBe('\\'')", "'\\''")


def multi_line_string():
python_test(".toBe('''7''')", "'''7'''")
python_test(".toBe(''' 7 ''')", "''' 7 '''")
python_test(".toBe('''\n7\n''')", "'''\n7\n'''")
python_test(".toBe(''' ' '' ' ''')", "''' ' '' ' '''")


def error_unclosed():
python_test_error(
".toBe(", "Appears to be an unclosed function call `.toBe()` on line 1"
)
python_test_error(
".toBe( \n ", "Appears to be an unclosed function call `.toBe()` on line 1"
)
python_test_error(
".toBe_TODO(",
"Appears to be an unclosed function call `.toBe_TODO()` on line 1",
)
python_test_error(
".toBe_TODO( \n ",
"Appears to be an unclosed function call `.toBe_TODO()` on line 1",
)
python_test_error(
".toBe_TODO(')", 'Appears to be an unclosed string literal `"` on line 1'
)
python_test_error(
".toBe_TODO(''')",
'Appears to be an unclosed multiline string literal `"""` on line 1',
)


def error_non_primitive():
python_test_error(
".toBe(1 + 1)",
"Non-primitive literal in `.toBe()` starting at line 1: error for character `+` on line 1",
)
python_test_error(
".toBe('1' + '1')",
"Non-primitive literal in `.toBe()` starting at line 1: error for character `+` on line 1",
)
python_test_error(
".toBe('''1''' + '''1''')",
"Non-primitive literal in `.toBe()` starting at line 1: error for character `+` on line 1",
)