From efd204d319fdeb80f1fb695c1ddc5344467f069d Mon Sep 17 00:00:00 2001 From: Zac Hatfield-Dodds Date: Sat, 13 Jan 2024 22:10:06 +1100 Subject: [PATCH] Use new efficient regex --- CHANGELOG.md | 4 ++++ deps/check.txt | 30 +++++++++++++++--------------- deps/test.txt | 22 +++++++++++----------- setup.py | 2 +- src/hypothesmith/cst.py | 22 +++++++++++----------- src/hypothesmith/syntactic.py | 18 ++---------------- 6 files changed, 44 insertions(+), 54 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e937948..97f0b14 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +### 0.3.2 - 2024-01-13 +- Performance improvements based on + [Hypothesis 6.93.0](https://hypothesis.readthedocs.io/en/latest/changes.html#v6-93-0) + ### 0.3.1 - 2023-09-06 - Hypothesis >= 6.89.0 made some internal changes which broke our `from_grammar()`. This patch restores compatibility, and requires the new Hypothesis. diff --git a/deps/check.txt b/deps/check.txt index a6bbd59..eeb656b 100644 --- a/deps/check.txt +++ b/deps/check.txt @@ -4,23 +4,23 @@ # # pip-compile --output-file=deps/check.txt deps/check.in # -attrs==23.1.0 +attrs==23.2.0 # via # flake8-bugbear # hypothesis autoflake==2.2.1 # via shed -bandit==1.7.5 +bandit==1.7.6 # via flake8-bandit -black==23.11.0 +black==23.12.1 # via shed click==8.1.7 # via black com2ann==0.3.0 # via shed -exceptiongroup==1.1.3 +exceptiongroup==1.2.0 # via hypothesis -flake8==6.1.0 +flake8==7.0.0 # via # -r deps/check.in # flake8-bandit @@ -29,7 +29,7 @@ flake8==6.1.0 # flake8-docstrings flake8-bandit==4.1.1 # via -r deps/check.in -flake8-bugbear==23.9.16 +flake8-bugbear==23.12.2 # via -r deps/check.in flake8-comprehensions==3.14.0 # via -r deps/check.in @@ -37,11 +37,11 @@ flake8-docstrings==1.7.0 # via -r deps/check.in gitdb==4.0.11 # via gitpython -gitpython==3.1.40 +gitpython==3.1.41 # via bandit -hypothesis==6.89.0 +hypothesis==6.93.0 # via -r deps/check.in -isort==5.12.0 +isort==5.13.2 # via shed libcst==1.1.0 # via shed @@ -51,7 +51,7 @@ mccabe==0.7.0 # via flake8 mdurl==0.1.2 # via markdown-it-py -mypy==1.7.0 +mypy==1.8.0 # via -r deps/check.in mypy-extensions==1.0.0 # via @@ -60,21 +60,21 @@ mypy-extensions==1.0.0 # typing-inspect packaging==23.2 # via black -pathspec==0.11.2 +pathspec==0.12.1 # via black pbr==6.0.0 # via stevedore -platformdirs==4.0.0 +platformdirs==4.1.0 # via black pycodestyle==2.11.1 # via flake8 pydocstyle==6.3.0 # via flake8-docstrings -pyflakes==3.1.0 +pyflakes==3.2.0 # via # autoflake # flake8 -pygments==2.16.1 +pygments==2.17.2 # via rich pyupgrade==3.15.0 # via shed @@ -101,7 +101,7 @@ tomli==2.0.1 # autoflake # black # mypy -typing-extensions==4.8.0 +typing-extensions==4.9.0 # via # black # libcst diff --git a/deps/test.txt b/deps/test.txt index abf3168..66cc169 100644 --- a/deps/test.txt +++ b/deps/test.txt @@ -4,25 +4,25 @@ # # pip-compile --output-file=deps/test.txt deps/test.in setup.py # -attrs==23.1.0 +attrs==23.2.0 # via hypothesis -black==23.11.0 +black==23.12.1 # via -r deps/test.in click==8.1.7 # via black -coverage[toml]==7.3.2 +coverage[toml]==7.4.0 # via pytest-cov -exceptiongroup==1.1.3 +exceptiongroup==1.2.0 # via # hypothesis # pytest execnet==2.0.2 # via pytest-xdist -hypothesis[lark]==6.89.0 +hypothesis[lark]==6.93.0 # via hypothesmith (setup.py) iniconfig==2.0.0 # via pytest -lark==1.1.8 +lark==1.1.9 # via hypothesis libcst==1.1.0 # via hypothesmith (setup.py) @@ -36,20 +36,20 @@ packaging==23.2 # pytest parso==0.8.3 # via -r deps/test.in -pathspec==0.11.2 +pathspec==0.12.1 # via black -platformdirs==4.0.0 +platformdirs==4.1.0 # via black pluggy==1.3.0 # via pytest -pytest==7.4.3 +pytest==7.4.4 # via # -r deps/test.in # pytest-cov # pytest-xdist pytest-cov==4.1.0 # via -r deps/test.in -pytest-xdist==3.4.0 +pytest-xdist==3.5.0 # via -r deps/test.in pyyaml==6.0.1 # via libcst @@ -60,7 +60,7 @@ tomli==2.0.1 # black # coverage # pytest -typing-extensions==4.8.0 +typing-extensions==4.9.0 # via # black # libcst diff --git a/setup.py b/setup.py index a104b7d..4fda72d 100644 --- a/setup.py +++ b/setup.py @@ -32,7 +32,7 @@ def local_file(name: str) -> str: license="MPL 2.0", description="Hypothesis strategies for generating Python programs, something like CSmith", zip_safe=False, - install_requires=["hypothesis[lark]>=6.84.1", "libcst>=1.0.1"], + install_requires=["hypothesis[lark]>=6.93.0", "libcst>=1.0.1"], python_requires=">=3.8", classifiers=[ "Development Status :: 4 - Beta", diff --git a/src/hypothesmith/cst.py b/src/hypothesmith/cst.py index 5cc886b..f391927 100644 --- a/src/hypothesmith/cst.py +++ b/src/hypothesmith/cst.py @@ -21,6 +21,13 @@ from libcst._nodes.expression import ExpressionPosition from libcst._nodes.statement import _INDENT_WHITESPACE_RE +from .syntactic import ALLOWED_CHARS + + +def py_from_regex(pattern): + return st.from_regex(pattern, fullmatch=True, alphabet=ALLOWED_CHARS) + + # For some nodes, we just need to ensure that they use the appropriate regex # pattern instead of allowing literally any string. for node_type, pattern in { @@ -29,11 +36,11 @@ libcst.Imaginary: IMAGNUMBER_RE, libcst.SimpleWhitespace: libcst._nodes.whitespace.SIMPLE_WHITESPACE_RE, }.items(): - _strategy = st.builds(node_type, st.from_regex(pattern, fullmatch=True)) + _strategy = st.builds(node_type, py_from_regex(pattern)) st.register_type_strategy(node_type, _strategy) # type-ignore comments are special in the 3.8+ (typed) ast, so boost their chances) -_comments = st.from_regex(libcst._nodes.whitespace.COMMENT_RE, fullmatch=True) +_comments = py_from_regex(libcst._nodes.whitespace.COMMENT_RE) st.register_type_strategy( libcst.Comment, st.builds(libcst.Comment, _comments | st.just("# type: ignore")) ) @@ -68,9 +75,7 @@ def nonempty_seq(*node: Type[libcst.CSTNode]) -> st.SearchStrategy: # inference to provide most of our arguments for us. # However, in some cases we want to either restrict arguments (e.g. libcst.Name), # or supply something nastier than the default argument (e.g. libcst.SimpleWhitespace) -nonempty_whitespace = st.builds( - libcst.SimpleWhitespace, st.from_regex(" +", fullmatch=True) -) +nonempty_whitespace = st.builds(libcst.SimpleWhitespace, py_from_regex(" +")) REGISTERED = ( [libcst.Asynchronous, nonempty_whitespace], [libcst.AsName, st.from_type(libcst.Name)], @@ -89,12 +94,7 @@ def nonempty_seq(*node: Type[libcst.CSTNode]) -> st.SearchStrategy: st.from_type(libcst.Name) | st.from_type(libcst.Attribute), nonempty_seq(libcst.ImportAlias), ], - [ - libcst.IndentedBlock, - infer, - infer, - st.from_regex(_INDENT_WHITESPACE_RE, fullmatch=True), - ], + [libcst.IndentedBlock, infer, infer, py_from_regex(_INDENT_WHITESPACE_RE)], [libcst.IsNot, infer, nonempty_whitespace, infer], [ libcst.MatchSingleton, diff --git a/src/hypothesmith/syntactic.py b/src/hypothesmith/syntactic.py index 654d8e7..ad99e05 100644 --- a/src/hypothesmith/syntactic.py +++ b/src/hypothesmith/syntactic.py @@ -25,6 +25,7 @@ "simple_stmt": "single", "compound_stmt": "single", } +ALLOWED_CHARS = st.characters(codec="utf-8", min_codepoint=1) class PythonIndenter(Indenter): @@ -37,17 +38,6 @@ class PythonIndenter(Indenter): tab_len = 4 -def utf8_encodable(terminal: str) -> bool: - try: - terminal.encode() - return True - except UnicodeEncodeError: # pragma: no cover - # Very rarely, a "." in some terminal regex will generate a surrogate - # character that cannot be encoded as UTF-8. We apply this filter to - # ensure it doesn't happen at runtime, but don't worry about coverage. - return False - - class GrammarStrategy(LarkStrategy): def __init__(self, grammar: Lark, start: str, auto_target: bool): explicit_strategies = { @@ -55,11 +45,7 @@ def __init__(self, grammar: Lark, start: str, auto_target: bool): PythonIndenter.DEDENT_type: st.just(""), "NAME": st.text().filter(str.isidentifier), } - super().__init__(grammar, start, explicit_strategies) - self.terminal_strategies = { - k: v.map(lambda s: s.replace("\0", "")).filter(utf8_encodable) - for k, v in self.terminal_strategies.items() # type: ignore - } + super().__init__(grammar, start, explicit_strategies, alphabet=ALLOWED_CHARS) self.auto_target = auto_target and start != "single_input" def do_draw(self, data): # type: ignore