From a3db7ab8d29ca2c288124733f9925990964abfcd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20Mazzucotelli?= Date: Thu, 10 Oct 2024 17:45:46 +0200 Subject: [PATCH 1/4] Add option to preserve comments when parsing templates --- src/jinja2/environment.py | 16 ++++++++++++---- src/jinja2/lexer.py | 25 +++++++++++++++++++++---- src/jinja2/nodes.py | 7 +++++++ src/jinja2/parser.py | 10 +++++++++- tests/test_lexnparse.py | 22 ++++++++++++++++++++++ 5 files changed, 71 insertions(+), 9 deletions(-) diff --git a/src/jinja2/environment.py b/src/jinja2/environment.py index 0b303d597..672874269 100644 --- a/src/jinja2/environment.py +++ b/src/jinja2/environment.py @@ -600,6 +600,7 @@ def parse( source: str, name: t.Optional[str] = None, filename: t.Optional[str] = None, + preserve_comments: bool = False, ) -> nodes.Template: """Parse the sourcecode and return the abstract syntax tree. This tree of nodes is used by the compiler to convert the template into @@ -610,15 +611,21 @@ def parse( this gives you a good overview of the node tree generated. """ try: - return self._parse(source, name, filename) + return self._parse(source, name, filename, preserve_comments) except TemplateSyntaxError: self.handle_exception(source=source) def _parse( - self, source: str, name: t.Optional[str], filename: t.Optional[str] + self, + source: str, + name: t.Optional[str], + filename: t.Optional[str], + preserve_comments: bool = False, ) -> nodes.Template: """Internal parsing function used by `parse` and `compile`.""" - return Parser(self, source, name, filename).parse() + return Parser( + self, source, name, filename, preserve_comments=preserve_comments + ).parse() def lex( self, @@ -663,12 +670,13 @@ def _tokenize( name: t.Optional[str], filename: t.Optional[str] = None, state: t.Optional[str] = None, + preserve_comments: bool = False, ) -> TokenStream: """Called by the parser to do the preprocessing and filtering for all the extensions. Returns a :class:`~jinja2.lexer.TokenStream`. """ source = self.preprocess(source, name, filename) - stream = self.lexer.tokenize(source, name, filename, state) + stream = self.lexer.tokenize(source, name, filename, state, preserve_comments) for ext in self.iter_extensions(): stream = ext.filter_stream(stream) # type: ignore diff --git a/src/jinja2/lexer.py b/src/jinja2/lexer.py index 6dc94b67d..1b30922a7 100644 --- a/src/jinja2/lexer.py +++ b/src/jinja2/lexer.py @@ -146,17 +146,22 @@ f"({'|'.join(re.escape(x) for x in sorted(operators, key=lambda x: -len(x)))})" ) -ignored_tokens = frozenset( +comment_tokens = frozenset( [ TOKEN_COMMENT_BEGIN, TOKEN_COMMENT, TOKEN_COMMENT_END, - TOKEN_WHITESPACE, TOKEN_LINECOMMENT_BEGIN, TOKEN_LINECOMMENT_END, TOKEN_LINECOMMENT, ] ) +ignored_tokens = frozenset( + [ + TOKEN_WHITESPACE, + *comment_tokens, + ] +) ignore_if_empty = frozenset( [TOKEN_WHITESPACE, TOKEN_DATA, TOKEN_COMMENT, TOKEN_LINECOMMENT] ) @@ -607,22 +612,30 @@ def tokenize( name: t.Optional[str] = None, filename: t.Optional[str] = None, state: t.Optional[str] = None, + preserve_comments: bool = False, ) -> TokenStream: """Calls tokeniter + tokenize and wraps it in a token stream.""" stream = self.tokeniter(source, name, filename, state) - return TokenStream(self.wrap(stream, name, filename), name, filename) + return TokenStream( + self.wrap(stream, name, filename, preserve_comments), name, filename + ) def wrap( self, stream: t.Iterable[t.Tuple[int, str, str]], name: t.Optional[str] = None, filename: t.Optional[str] = None, + preserve_comments: bool = False, ) -> t.Iterator[Token]: """This is called with the stream as returned by `tokenize` and wraps every token in a :class:`Token` and converts the value. """ + ignored = ignored_tokens + if preserve_comments: + ignored -= comment_tokens + for lineno, token, value_str in stream: - if token in ignored_tokens: + if token in ignored: continue value: t.Any = value_str @@ -631,6 +644,10 @@ def wrap( token = TOKEN_BLOCK_BEGIN elif token == TOKEN_LINESTATEMENT_END: token = TOKEN_BLOCK_END + elif token == TOKEN_LINECOMMENT_BEGIN: + token = TOKEN_COMMENT_BEGIN + elif token == TOKEN_LINECOMMENT_END: + token = TOKEN_COMMENT_END # we are not interested in those tokens in the parser elif token in (TOKEN_RAW_BEGIN, TOKEN_RAW_END): continue diff --git a/src/jinja2/nodes.py b/src/jinja2/nodes.py index 2f93b90ec..9c81008ba 100644 --- a/src/jinja2/nodes.py +++ b/src/jinja2/nodes.py @@ -715,6 +715,13 @@ def as_const(self, eval_ctx: t.Optional[EvalContext] = None) -> t.Any: return self.expr2.as_const(eval_ctx) +class Comment(Stmt): + """A template comment.""" + + fields = ("data",) + data: str + + def args_as_const( node: t.Union["_FilterTestCommon", "Call"], eval_ctx: t.Optional[EvalContext] ) -> t.Tuple[t.List[t.Any], t.Dict[t.Any, t.Any]]: diff --git a/src/jinja2/parser.py b/src/jinja2/parser.py index 817abeccf..a8ed5941c 100644 --- a/src/jinja2/parser.py +++ b/src/jinja2/parser.py @@ -57,9 +57,12 @@ def __init__( name: t.Optional[str] = None, filename: t.Optional[str] = None, state: t.Optional[str] = None, + preserve_comments: bool = False, ) -> None: self.environment = environment - self.stream = environment._tokenize(source, name, filename, state) + self.stream = environment._tokenize( + source, name, filename, state, preserve_comments + ) self.name = name self.filename = filename self.closed = False @@ -1025,6 +1028,11 @@ def flush_data() -> None: else: body.append(rv) self.stream.expect("block_end") + elif token.type == "comment_begin": + flush_data() + next(self.stream) + body.append(nodes.Comment(next(self.stream).value)) + self.stream.expect("comment_end") else: raise AssertionError("internal parsing error") diff --git a/tests/test_lexnparse.py b/tests/test_lexnparse.py index c02adad5a..ca0708a75 100644 --- a/tests/test_lexnparse.py +++ b/tests/test_lexnparse.py @@ -314,6 +314,28 @@ def assert_error(code, expected): ) assert_error("{% unknown_tag %}", "Encountered unknown tag 'unknown_tag'.") + def test_comment_preservation(self, env): + ast = env.parse("{# foo #}{{ bar }}", preserve_comments=True) + assert len(ast.body) == 2 + assert isinstance(ast.body[0], nodes.Comment) + assert ast.body[0].data == " foo " + + ast = env.parse("{# foo #}{{ bar }}", preserve_comments=False) + assert len(ast.body) == 1 + assert not isinstance(ast.body[0], nodes.Comment) + + def test_line_comment_preservation(self, env): + env = Environment(line_comment_prefix="#") + + ast = env.parse("# foo\n{{ bar }}", preserve_comments=True) + assert len(ast.body) == 2 + assert isinstance(ast.body[0], nodes.Comment) + assert ast.body[0].data == " foo" + + ast = env.parse("# foo\n{{ bar }}", preserve_comments=False) + assert len(ast.body) == 1 + assert not isinstance(ast.body[0], nodes.Comment) + class TestSyntax: def test_call(self, env): From 35681ece13f7f372dadfc814ee7c0413ea92d337 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20Mazzucotelli?= Date: Thu, 10 Oct 2024 18:19:12 +0200 Subject: [PATCH 2/4] fixup! Add option to preserve comments when parsing templates --- CHANGES.rst | 1 + src/jinja2/environment.py | 3 +++ src/jinja2/lexer.py | 9 ++++++++- 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index f23b6c96f..a10fa652d 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -9,6 +9,7 @@ Unreleased - Use modern packaging metadata with ``pyproject.toml`` instead of ``setup.cfg``. :pr:`1793` - Use ``flit_core`` instead of ``setuptools`` as build backend. +- Add the ``preserve_comments`` parameter to ``Environment.parse`` to preserve comments in template ASTs. :pr:`2037` Version 3.1.5 diff --git a/src/jinja2/environment.py b/src/jinja2/environment.py index 672874269..7705913f2 100644 --- a/src/jinja2/environment.py +++ b/src/jinja2/environment.py @@ -609,6 +609,9 @@ def parse( If you are :ref:`developing Jinja extensions ` this gives you a good overview of the node tree generated. + + .. versionchanged:: 3.2 + Added `preserve_comments` parameter. """ try: return self._parse(source, name, filename, preserve_comments) diff --git a/src/jinja2/lexer.py b/src/jinja2/lexer.py index 1b30922a7..6b2980061 100644 --- a/src/jinja2/lexer.py +++ b/src/jinja2/lexer.py @@ -614,7 +614,11 @@ def tokenize( state: t.Optional[str] = None, preserve_comments: bool = False, ) -> TokenStream: - """Calls tokeniter + tokenize and wraps it in a token stream.""" + """Calls tokeniter + tokenize and wraps it in a token stream. + + .. versionchanged:: 3.2 + Added `preserve_comments` parameter. + """ stream = self.tokeniter(source, name, filename, state) return TokenStream( self.wrap(stream, name, filename, preserve_comments), name, filename @@ -629,6 +633,9 @@ def wrap( ) -> t.Iterator[Token]: """This is called with the stream as returned by `tokenize` and wraps every token in a :class:`Token` and converts the value. + + .. versionchanged:: 3.2 + Added `preserve_comments` parameter. """ ignored = ignored_tokens if preserve_comments: From 7b718193cb3292b27ae41a73a3d61c1d8e29a188 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20Mazzucotelli?= Date: Thu, 10 Oct 2024 20:58:31 +0200 Subject: [PATCH 3/4] fixup! Add option to preserve comments when parsing templates --- CHANGES.rst | 2 +- src/jinja2/environment.py | 14 +++----------- src/jinja2/lexer.py | 38 ++++---------------------------------- src/jinja2/parser.py | 16 ++++++++-------- tests/test_lexnparse.py | 13 ++----------- 5 files changed, 18 insertions(+), 65 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index a10fa652d..c8e357133 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -9,7 +9,7 @@ Unreleased - Use modern packaging metadata with ``pyproject.toml`` instead of ``setup.cfg``. :pr:`1793` - Use ``flit_core`` instead of ``setuptools`` as build backend. -- Add the ``preserve_comments`` parameter to ``Environment.parse`` to preserve comments in template ASTs. :pr:`2037` +- Preserve comments in ASTs when parsing templates with ``Environment.parse``. :pr:`2037` Version 3.1.5 diff --git a/src/jinja2/environment.py b/src/jinja2/environment.py index 7705913f2..531669eff 100644 --- a/src/jinja2/environment.py +++ b/src/jinja2/environment.py @@ -600,7 +600,6 @@ def parse( source: str, name: t.Optional[str] = None, filename: t.Optional[str] = None, - preserve_comments: bool = False, ) -> nodes.Template: """Parse the sourcecode and return the abstract syntax tree. This tree of nodes is used by the compiler to convert the template into @@ -609,12 +608,9 @@ def parse( If you are :ref:`developing Jinja extensions ` this gives you a good overview of the node tree generated. - - .. versionchanged:: 3.2 - Added `preserve_comments` parameter. """ try: - return self._parse(source, name, filename, preserve_comments) + return self._parse(source, name, filename) except TemplateSyntaxError: self.handle_exception(source=source) @@ -623,12 +619,9 @@ def _parse( source: str, name: t.Optional[str], filename: t.Optional[str], - preserve_comments: bool = False, ) -> nodes.Template: """Internal parsing function used by `parse` and `compile`.""" - return Parser( - self, source, name, filename, preserve_comments=preserve_comments - ).parse() + return Parser(self, source, name, filename).parse() def lex( self, @@ -673,13 +666,12 @@ def _tokenize( name: t.Optional[str], filename: t.Optional[str] = None, state: t.Optional[str] = None, - preserve_comments: bool = False, ) -> TokenStream: """Called by the parser to do the preprocessing and filtering for all the extensions. Returns a :class:`~jinja2.lexer.TokenStream`. """ source = self.preprocess(source, name, filename) - stream = self.lexer.tokenize(source, name, filename, state, preserve_comments) + stream = self.lexer.tokenize(source, name, filename, state) for ext in self.iter_extensions(): stream = ext.filter_stream(stream) # type: ignore diff --git a/src/jinja2/lexer.py b/src/jinja2/lexer.py index 6b2980061..88f2fc3d6 100644 --- a/src/jinja2/lexer.py +++ b/src/jinja2/lexer.py @@ -146,22 +146,7 @@ f"({'|'.join(re.escape(x) for x in sorted(operators, key=lambda x: -len(x)))})" ) -comment_tokens = frozenset( - [ - TOKEN_COMMENT_BEGIN, - TOKEN_COMMENT, - TOKEN_COMMENT_END, - TOKEN_LINECOMMENT_BEGIN, - TOKEN_LINECOMMENT_END, - TOKEN_LINECOMMENT, - ] -) -ignored_tokens = frozenset( - [ - TOKEN_WHITESPACE, - *comment_tokens, - ] -) +ignored_tokens = frozenset([TOKEN_WHITESPACE]) ignore_if_empty = frozenset( [TOKEN_WHITESPACE, TOKEN_DATA, TOKEN_COMMENT, TOKEN_LINECOMMENT] ) @@ -612,37 +597,22 @@ def tokenize( name: t.Optional[str] = None, filename: t.Optional[str] = None, state: t.Optional[str] = None, - preserve_comments: bool = False, ) -> TokenStream: - """Calls tokeniter + tokenize and wraps it in a token stream. - - .. versionchanged:: 3.2 - Added `preserve_comments` parameter. - """ + """Calls tokeniter + tokenize and wraps it in a token stream.""" stream = self.tokeniter(source, name, filename, state) - return TokenStream( - self.wrap(stream, name, filename, preserve_comments), name, filename - ) + return TokenStream(self.wrap(stream, name, filename), name, filename) def wrap( self, stream: t.Iterable[t.Tuple[int, str, str]], name: t.Optional[str] = None, filename: t.Optional[str] = None, - preserve_comments: bool = False, ) -> t.Iterator[Token]: """This is called with the stream as returned by `tokenize` and wraps every token in a :class:`Token` and converts the value. - - .. versionchanged:: 3.2 - Added `preserve_comments` parameter. """ - ignored = ignored_tokens - if preserve_comments: - ignored -= comment_tokens - for lineno, token, value_str in stream: - if token in ignored: + if token in ignored_tokens: continue value: t.Any = value_str diff --git a/src/jinja2/parser.py b/src/jinja2/parser.py index a8ed5941c..9c3a94df8 100644 --- a/src/jinja2/parser.py +++ b/src/jinja2/parser.py @@ -57,12 +57,9 @@ def __init__( name: t.Optional[str] = None, filename: t.Optional[str] = None, state: t.Optional[str] = None, - preserve_comments: bool = False, ) -> None: self.environment = environment - self.stream = environment._tokenize( - source, name, filename, state, preserve_comments - ) + self.stream = environment._tokenize(source, name, filename, state) self.name = name self.filename = filename self.closed = False @@ -318,10 +315,13 @@ def parse_block(self) -> nodes.Block: # with whitespace data if node.required: for body_node in node.body: - if not isinstance(body_node, nodes.Output) or any( - not isinstance(output_node, nodes.TemplateData) - or not output_node.data.isspace() - for output_node in body_node.nodes + if not isinstance(body_node, (nodes.Output, nodes.Comment)) or ( + isinstance(body_node, nodes.Output) + and any( + not isinstance(output_node, nodes.TemplateData) + or not output_node.data.isspace() + for output_node in body_node.nodes + ) ): self.fail("Required blocks can only contain comments or whitespace") diff --git a/tests/test_lexnparse.py b/tests/test_lexnparse.py index ca0708a75..cac32cf71 100644 --- a/tests/test_lexnparse.py +++ b/tests/test_lexnparse.py @@ -315,27 +315,18 @@ def assert_error(code, expected): assert_error("{% unknown_tag %}", "Encountered unknown tag 'unknown_tag'.") def test_comment_preservation(self, env): - ast = env.parse("{# foo #}{{ bar }}", preserve_comments=True) + ast = env.parse("{# foo #}{{ bar }}") assert len(ast.body) == 2 assert isinstance(ast.body[0], nodes.Comment) assert ast.body[0].data == " foo " - ast = env.parse("{# foo #}{{ bar }}", preserve_comments=False) - assert len(ast.body) == 1 - assert not isinstance(ast.body[0], nodes.Comment) - def test_line_comment_preservation(self, env): env = Environment(line_comment_prefix="#") - - ast = env.parse("# foo\n{{ bar }}", preserve_comments=True) + ast = env.parse("# foo\n{{ bar }}") assert len(ast.body) == 2 assert isinstance(ast.body[0], nodes.Comment) assert ast.body[0].data == " foo" - ast = env.parse("# foo\n{{ bar }}", preserve_comments=False) - assert len(ast.body) == 1 - assert not isinstance(ast.body[0], nodes.Comment) - class TestSyntax: def test_call(self, env): From 7622638b961417b7121b5583a4e7f8266457f7bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20Mazzucotelli?= Date: Thu, 10 Oct 2024 21:00:56 +0200 Subject: [PATCH 4/4] fixup! Add option to preserve comments when parsing templates --- src/jinja2/environment.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/jinja2/environment.py b/src/jinja2/environment.py index 531669eff..0b303d597 100644 --- a/src/jinja2/environment.py +++ b/src/jinja2/environment.py @@ -615,10 +615,7 @@ def parse( self.handle_exception(source=source) def _parse( - self, - source: str, - name: t.Optional[str], - filename: t.Optional[str], + self, source: str, name: t.Optional[str], filename: t.Optional[str] ) -> nodes.Template: """Internal parsing function used by `parse` and `compile`.""" return Parser(self, source, name, filename).parse()