pallets · frankdu · Apr 10, 2024 · Apr 10, 2024
diff --git a/CHANGES.rst b/CHANGES.rst
@@ -8,6 +8,11 @@ Unreleased
 -   Use modern packaging metadata with ``pyproject.toml`` instead of ``setup.cfg``.
     :pr:`1793`
 -   Use ``flit_core`` instead of ``setuptools`` as build backend.
+-   Add the property ``ignore_raw_begin_end_tokens`` to Lexer, as an option to keep
+    the raw_begin, raw_end tokens in the token stream. :pr:`1962`
+-   Add the ``lexer_provider`` property to Environment, so to allow inject your
+    customized Lexer instances for different use cases (e.g. testing). :pr:`1962`
+
 
 
 Version 3.1.3

diff --git a/src/jinja2/environment.py b/src/jinja2/environment.py
@@ -123,6 +123,11 @@ def load_extensions(
     return result
 
 
+def default_lexer_provider(environment: "Environment") -> Lexer:
+    """Default lexer provider."""
+    return get_lexer(environment)
+
+
 def _environment_config_check(environment: "Environment") -> "Environment":
     """Perform a sanity check on the environment."""
     assert issubclass(
@@ -261,6 +266,11 @@ class Environment:
         `enable_async`
             If set to true this enables async template execution which
             allows using async functions and generators.
+        `lexer_provider`
+            Can provide your own method to create a Lexer, share lexers, or look up
+            existing lexers from cache. It enables the lexer customization. The default
+            is None, and will use the `default_lexer_provdier`
+            .. versionchanged:: 3.2
     """
 
     #: if this environment is sandboxed.  Modifying this variable won't make
@@ -315,6 +325,7 @@ def __init__(
         auto_reload: bool = True,
         bytecode_cache: t.Optional["BytecodeCache"] = None,
         enable_async: bool = False,
+        lexer_provider: t.Optional[t.Callable[["Environment"], Lexer]] = None,
     ):
         # !!Important notice!!
         #   The constructor accepts quite a few arguments that should be
@@ -364,6 +375,8 @@ def __init__(
         # load extensions
         self.extensions = load_extensions(self, extensions)
 
+        self.lexer_provider = lexer_provider or default_lexer_provider
+
         self.is_async = enable_async
         _environment_config_check(self)
 
@@ -454,7 +467,7 @@ def overlay(
     @property
     def lexer(self) -> Lexer:
         """The lexer for this environment."""
-        return get_lexer(self)
+        return self.lexer_provider(self)
 
     def iter_extensions(self) -> t.Iterator["Extension"]:
         """Iterates over the extensions by priority."""

diff --git a/src/jinja2/lexer.py b/src/jinja2/lexer.py
@@ -474,6 +474,9 @@ class Lexer:
     Multiple environments can share the same lexer.
     """
 
+    # Whether to ignore the raw_begin, raw_end tokens
+    ignore_raw_begin_end_tokens = True
+
     def __init__(self, environment: "Environment") -> None:
         # shortcuts
         e = re.escape
@@ -629,9 +632,11 @@ def wrap(
                 token = TOKEN_BLOCK_BEGIN
             elif token == TOKEN_LINESTATEMENT_END:
                 token = TOKEN_BLOCK_END
-            # we are not interested in those tokens in the parser
             elif token in (TOKEN_RAW_BEGIN, TOKEN_RAW_END):
-                continue
+                if self.ignore_raw_begin_end_tokens:
+                    continue
+                else:
+                    value = value_str
             elif token == TOKEN_DATA:
                 value = self._normalize_newlines(value_str)
             elif token == "keyword":

diff --git a/src/jinja2/parser.py b/src/jinja2/parser.py
@@ -1025,6 +1025,8 @@ def flush_data() -> None:
                     else:
                         body.append(rv)
                     self.stream.expect("block_end")
+                elif token.type in ("raw_begin", "raw_end"):
+                    next(self.stream)
                 else:
                     raise AssertionError("internal parsing error")
 

diff --git a/tests/test_lexnparse.py b/tests/test_lexnparse.py
@@ -2,9 +2,11 @@
 
 from jinja2 import Environment
 from jinja2 import nodes
+from jinja2 import select_autoescape
 from jinja2 import Template
 from jinja2 import TemplateSyntaxError
 from jinja2 import UndefinedError
+from jinja2.lexer import Lexer
 from jinja2.lexer import Token
 from jinja2.lexer import TOKEN_BLOCK_BEGIN
 from jinja2.lexer import TOKEN_BLOCK_END
@@ -40,6 +42,12 @@ def test_iter(self, env):
         ]
 
 
+def _lexer_provider_to_test_raw_tokens(env):
+    lexer = Lexer(env)
+    lexer.ignore_raw_begin_end_tokens = False
+    return lexer
+
+
 class TestLexer:
     def test_raw1(self, env):
         tmpl = env.from_string(
@@ -68,6 +76,26 @@ def test_raw4(self, env):
         )
         assert tmpl.render() == "bar2 spaces\n spacefoo"
 
+    def test_raw5(self, env):
+        tmpl_str = (
+            "{{ tag }}{% raw %}<foo>{{ tag }}</foo>{% endraw %}|"
+            "{%raw%}{{ bar }}|{% baz %}{%       endraw    %}"
+        )
+        expected = "&lt;foo/&gt;<foo>{{ tag }}</foo>|{{ bar }}|{% baz %}"
+
+        # the lexer is all defaults
+        env = Environment(autoescape=select_autoescape(["html"]))
+        tmpl = env.from_string(tmpl_str)
+        assert tmpl.render(baz="test", tag="<foo/>") == expected
+
+        # changed the lexer provider
+        env = Environment(
+            autoescape=select_autoescape(["html"]),
+            lexer_provider=_lexer_provider_to_test_raw_tokens,
+        )
+        tmpl = env.from_string(tmpl_str)
+        assert tmpl.render(baz="test", tag="<foo/>") == expected
+
     def test_balancing(self, env):
         env = Environment("{%", "%}", "${", "}")
         tmpl = env.from_string(