Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow to customize some behaviors of Lexer, so that Extension instances can know raw block begin and end info. #1963

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@ Unreleased
- Use modern packaging metadata with ``pyproject.toml`` instead of ``setup.cfg``.
:pr:`1793`
- Use ``flit_core`` instead of ``setuptools`` as build backend.
- Add the property ``ignore_raw_begin_end_tokens`` to Lexer, as an option to keep
the raw_begin, raw_end tokens in the token stream. :pr:`1962`
- Add the ``lexer_provider`` property to Environment, so to allow inject your
customized Lexer instances for different use cases (e.g. testing). :pr:`1962`



Version 3.1.3
Expand Down
15 changes: 14 additions & 1 deletion src/jinja2/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,11 @@ def load_extensions(
return result


def default_lexer_provider(environment: "Environment") -> Lexer:
"""Default lexer provider."""
return get_lexer(environment)


def _environment_config_check(environment: "Environment") -> "Environment":
"""Perform a sanity check on the environment."""
assert issubclass(
Expand Down Expand Up @@ -261,6 +266,11 @@ class Environment:
`enable_async`
If set to true this enables async template execution which
allows using async functions and generators.
`lexer_provider`
Can provide your own method to create a Lexer, share lexers, or look up
existing lexers from cache. It enables the lexer customization. The default
is None, and will use the `default_lexer_provdier`
.. versionchanged:: 3.2
"""

#: if this environment is sandboxed. Modifying this variable won't make
Expand Down Expand Up @@ -315,6 +325,7 @@ def __init__(
auto_reload: bool = True,
bytecode_cache: t.Optional["BytecodeCache"] = None,
enable_async: bool = False,
lexer_provider: t.Optional[t.Callable[["Environment"], Lexer]] = None,
):
# !!Important notice!!
# The constructor accepts quite a few arguments that should be
Expand Down Expand Up @@ -364,6 +375,8 @@ def __init__(
# load extensions
self.extensions = load_extensions(self, extensions)

self.lexer_provider = lexer_provider or default_lexer_provider

self.is_async = enable_async
_environment_config_check(self)

Expand Down Expand Up @@ -454,7 +467,7 @@ def overlay(
@property
def lexer(self) -> Lexer:
"""The lexer for this environment."""
return get_lexer(self)
return self.lexer_provider(self)

def iter_extensions(self) -> t.Iterator["Extension"]:
"""Iterates over the extensions by priority."""
Expand Down
9 changes: 7 additions & 2 deletions src/jinja2/lexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -474,6 +474,9 @@ class Lexer:
Multiple environments can share the same lexer.
"""

# Whether to ignore the raw_begin, raw_end tokens
ignore_raw_begin_end_tokens = True

def __init__(self, environment: "Environment") -> None:
# shortcuts
e = re.escape
Expand Down Expand Up @@ -629,9 +632,11 @@ def wrap(
token = TOKEN_BLOCK_BEGIN
elif token == TOKEN_LINESTATEMENT_END:
token = TOKEN_BLOCK_END
# we are not interested in those tokens in the parser
elif token in (TOKEN_RAW_BEGIN, TOKEN_RAW_END):
continue
if self.ignore_raw_begin_end_tokens:
continue
else:
value = value_str
elif token == TOKEN_DATA:
value = self._normalize_newlines(value_str)
elif token == "keyword":
Expand Down
2 changes: 2 additions & 0 deletions src/jinja2/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -1025,6 +1025,8 @@ def flush_data() -> None:
else:
body.append(rv)
self.stream.expect("block_end")
elif token.type in ("raw_begin", "raw_end"):
next(self.stream)
else:
raise AssertionError("internal parsing error")

Expand Down
28 changes: 28 additions & 0 deletions tests/test_lexnparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@

from jinja2 import Environment
from jinja2 import nodes
from jinja2 import select_autoescape
from jinja2 import Template
from jinja2 import TemplateSyntaxError
from jinja2 import UndefinedError
from jinja2.lexer import Lexer
from jinja2.lexer import Token
from jinja2.lexer import TOKEN_BLOCK_BEGIN
from jinja2.lexer import TOKEN_BLOCK_END
Expand Down Expand Up @@ -40,6 +42,12 @@ def test_iter(self, env):
]


def _lexer_provider_to_test_raw_tokens(env):
lexer = Lexer(env)
lexer.ignore_raw_begin_end_tokens = False
return lexer


class TestLexer:
def test_raw1(self, env):
tmpl = env.from_string(
Expand Down Expand Up @@ -68,6 +76,26 @@ def test_raw4(self, env):
)
assert tmpl.render() == "bar2 spaces\n spacefoo"

def test_raw5(self, env):
tmpl_str = (
"{{ tag }}{% raw %}<foo>{{ tag }}</foo>{% endraw %}|"
"{%raw%}{{ bar }}|{% baz %}{% endraw %}"
)
expected = "&lt;foo/&gt;<foo>{{ tag }}</foo>|{{ bar }}|{% baz %}"

# the lexer is all defaults
env = Environment(autoescape=select_autoescape(["html"]))
tmpl = env.from_string(tmpl_str)
assert tmpl.render(baz="test", tag="<foo/>") == expected

# changed the lexer provider
env = Environment(
autoescape=select_autoescape(["html"]),
lexer_provider=_lexer_provider_to_test_raw_tokens,
)
tmpl = env.from_string(tmpl_str)
assert tmpl.render(baz="test", tag="<foo/>") == expected

def test_balancing(self, env):
env = Environment("{%", "%}", "${", "}")
tmpl = env.from_string(
Expand Down
Loading