Skip to content

Commit

Permalink
Use new efficient regex
Browse files Browse the repository at this point in the history
  • Loading branch information
Zac-HD committed Jan 13, 2024
1 parent f4c1cf4 commit efd204d
Show file tree
Hide file tree
Showing 6 changed files with 44 additions and 54 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Changelog

### 0.3.2 - 2024-01-13
- Performance improvements based on
[Hypothesis 6.93.0](https://hypothesis.readthedocs.io/en/latest/changes.html#v6-93-0)

### 0.3.1 - 2023-09-06
- Hypothesis >= 6.89.0 made some internal changes which broke our `from_grammar()`.
This patch restores compatibility, and requires the new Hypothesis.
Expand Down
30 changes: 15 additions & 15 deletions deps/check.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,23 @@
#
# pip-compile --output-file=deps/check.txt deps/check.in
#
attrs==23.1.0
attrs==23.2.0
# via
# flake8-bugbear
# hypothesis
autoflake==2.2.1
# via shed
bandit==1.7.5
bandit==1.7.6
# via flake8-bandit
black==23.11.0
black==23.12.1
# via shed
click==8.1.7
# via black
com2ann==0.3.0
# via shed
exceptiongroup==1.1.3
exceptiongroup==1.2.0
# via hypothesis
flake8==6.1.0
flake8==7.0.0
# via
# -r deps/check.in
# flake8-bandit
Expand All @@ -29,19 +29,19 @@ flake8==6.1.0
# flake8-docstrings
flake8-bandit==4.1.1
# via -r deps/check.in
flake8-bugbear==23.9.16
flake8-bugbear==23.12.2
# via -r deps/check.in
flake8-comprehensions==3.14.0
# via -r deps/check.in
flake8-docstrings==1.7.0
# via -r deps/check.in
gitdb==4.0.11
# via gitpython
gitpython==3.1.40
gitpython==3.1.41
# via bandit
hypothesis==6.89.0
hypothesis==6.93.0
# via -r deps/check.in
isort==5.12.0
isort==5.13.2
# via shed
libcst==1.1.0
# via shed
Expand All @@ -51,7 +51,7 @@ mccabe==0.7.0
# via flake8
mdurl==0.1.2
# via markdown-it-py
mypy==1.7.0
mypy==1.8.0
# via -r deps/check.in
mypy-extensions==1.0.0
# via
Expand All @@ -60,21 +60,21 @@ mypy-extensions==1.0.0
# typing-inspect
packaging==23.2
# via black
pathspec==0.11.2
pathspec==0.12.1
# via black
pbr==6.0.0
# via stevedore
platformdirs==4.0.0
platformdirs==4.1.0
# via black
pycodestyle==2.11.1
# via flake8
pydocstyle==6.3.0
# via flake8-docstrings
pyflakes==3.1.0
pyflakes==3.2.0
# via
# autoflake
# flake8
pygments==2.16.1
pygments==2.17.2
# via rich
pyupgrade==3.15.0
# via shed
Expand All @@ -101,7 +101,7 @@ tomli==2.0.1
# autoflake
# black
# mypy
typing-extensions==4.8.0
typing-extensions==4.9.0
# via
# black
# libcst
Expand Down
22 changes: 11 additions & 11 deletions deps/test.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,25 +4,25 @@
#
# pip-compile --output-file=deps/test.txt deps/test.in setup.py
#
attrs==23.1.0
attrs==23.2.0
# via hypothesis
black==23.11.0
black==23.12.1
# via -r deps/test.in
click==8.1.7
# via black
coverage[toml]==7.3.2
coverage[toml]==7.4.0
# via pytest-cov
exceptiongroup==1.1.3
exceptiongroup==1.2.0
# via
# hypothesis
# pytest
execnet==2.0.2
# via pytest-xdist
hypothesis[lark]==6.89.0
hypothesis[lark]==6.93.0
# via hypothesmith (setup.py)
iniconfig==2.0.0
# via pytest
lark==1.1.8
lark==1.1.9
# via hypothesis
libcst==1.1.0
# via hypothesmith (setup.py)
Expand All @@ -36,20 +36,20 @@ packaging==23.2
# pytest
parso==0.8.3
# via -r deps/test.in
pathspec==0.11.2
pathspec==0.12.1
# via black
platformdirs==4.0.0
platformdirs==4.1.0
# via black
pluggy==1.3.0
# via pytest
pytest==7.4.3
pytest==7.4.4
# via
# -r deps/test.in
# pytest-cov
# pytest-xdist
pytest-cov==4.1.0
# via -r deps/test.in
pytest-xdist==3.4.0
pytest-xdist==3.5.0
# via -r deps/test.in
pyyaml==6.0.1
# via libcst
Expand All @@ -60,7 +60,7 @@ tomli==2.0.1
# black
# coverage
# pytest
typing-extensions==4.8.0
typing-extensions==4.9.0
# via
# black
# libcst
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def local_file(name: str) -> str:
license="MPL 2.0",
description="Hypothesis strategies for generating Python programs, something like CSmith",
zip_safe=False,
install_requires=["hypothesis[lark]>=6.84.1", "libcst>=1.0.1"],
install_requires=["hypothesis[lark]>=6.93.0", "libcst>=1.0.1"],
python_requires=">=3.8",
classifiers=[
"Development Status :: 4 - Beta",
Expand Down
22 changes: 11 additions & 11 deletions src/hypothesmith/cst.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,13 @@
from libcst._nodes.expression import ExpressionPosition
from libcst._nodes.statement import _INDENT_WHITESPACE_RE

from .syntactic import ALLOWED_CHARS


def py_from_regex(pattern):
return st.from_regex(pattern, fullmatch=True, alphabet=ALLOWED_CHARS)


# For some nodes, we just need to ensure that they use the appropriate regex
# pattern instead of allowing literally any string.
for node_type, pattern in {
Expand All @@ -29,11 +36,11 @@
libcst.Imaginary: IMAGNUMBER_RE,
libcst.SimpleWhitespace: libcst._nodes.whitespace.SIMPLE_WHITESPACE_RE,
}.items():
_strategy = st.builds(node_type, st.from_regex(pattern, fullmatch=True))
_strategy = st.builds(node_type, py_from_regex(pattern))
st.register_type_strategy(node_type, _strategy)

# type-ignore comments are special in the 3.8+ (typed) ast, so boost their chances)
_comments = st.from_regex(libcst._nodes.whitespace.COMMENT_RE, fullmatch=True)
_comments = py_from_regex(libcst._nodes.whitespace.COMMENT_RE)
st.register_type_strategy(
libcst.Comment, st.builds(libcst.Comment, _comments | st.just("# type: ignore"))
)
Expand Down Expand Up @@ -68,9 +75,7 @@ def nonempty_seq(*node: Type[libcst.CSTNode]) -> st.SearchStrategy:
# inference to provide most of our arguments for us.
# However, in some cases we want to either restrict arguments (e.g. libcst.Name),
# or supply something nastier than the default argument (e.g. libcst.SimpleWhitespace)
nonempty_whitespace = st.builds(
libcst.SimpleWhitespace, st.from_regex(" +", fullmatch=True)
)
nonempty_whitespace = st.builds(libcst.SimpleWhitespace, py_from_regex(" +"))
REGISTERED = (
[libcst.Asynchronous, nonempty_whitespace],
[libcst.AsName, st.from_type(libcst.Name)],
Expand All @@ -89,12 +94,7 @@ def nonempty_seq(*node: Type[libcst.CSTNode]) -> st.SearchStrategy:
st.from_type(libcst.Name) | st.from_type(libcst.Attribute),
nonempty_seq(libcst.ImportAlias),
],
[
libcst.IndentedBlock,
infer,
infer,
st.from_regex(_INDENT_WHITESPACE_RE, fullmatch=True),
],
[libcst.IndentedBlock, infer, infer, py_from_regex(_INDENT_WHITESPACE_RE)],
[libcst.IsNot, infer, nonempty_whitespace, infer],
[
libcst.MatchSingleton,
Expand Down
18 changes: 2 additions & 16 deletions src/hypothesmith/syntactic.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
"simple_stmt": "single",
"compound_stmt": "single",
}
ALLOWED_CHARS = st.characters(codec="utf-8", min_codepoint=1)


class PythonIndenter(Indenter):
Expand All @@ -37,29 +38,14 @@ class PythonIndenter(Indenter):
tab_len = 4


def utf8_encodable(terminal: str) -> bool:
try:
terminal.encode()
return True
except UnicodeEncodeError: # pragma: no cover
# Very rarely, a "." in some terminal regex will generate a surrogate
# character that cannot be encoded as UTF-8. We apply this filter to
# ensure it doesn't happen at runtime, but don't worry about coverage.
return False


class GrammarStrategy(LarkStrategy):
def __init__(self, grammar: Lark, start: str, auto_target: bool):
explicit_strategies = {
PythonIndenter.INDENT_type: st.just(" " * PythonIndenter.tab_len),
PythonIndenter.DEDENT_type: st.just(""),
"NAME": st.text().filter(str.isidentifier),
}
super().__init__(grammar, start, explicit_strategies)
self.terminal_strategies = {
k: v.map(lambda s: s.replace("\0", "")).filter(utf8_encodable)
for k, v in self.terminal_strategies.items() # type: ignore
}
super().__init__(grammar, start, explicit_strategies, alphabet=ALLOWED_CHARS)
self.auto_target = auto_target and start != "single_input"

def do_draw(self, data): # type: ignore
Expand Down

0 comments on commit efd204d

Please sign in to comment.