From f5ed3f4a05891c5490ea7ff454740206ac1ab0e9 Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Sat, 13 Jul 2024 10:09:26 -0700 Subject: [PATCH] almost there --- prqlc/prqlc-parser/src/parser/common.rs | 51 +++-- prqlc/prqlc-parser/src/parser/expr.rs | 179 +++++++++++++----- prqlc/prqlc-parser/src/parser/mod.rs | 1 + prqlc/prqlc-parser/src/parser/stmt.rs | 107 ++++++++++- prqlc/prqlc-parser/src/parser/test.rs | 10 +- ...qlc_parser__test__pipeline_parse_tree.snap | 2 +- prqlc/prqlc-parser/src/test.rs | 125 +++++++----- 7 files changed, 350 insertions(+), 125 deletions(-) diff --git a/prqlc/prqlc-parser/src/parser/common.rs b/prqlc/prqlc-parser/src/parser/common.rs index a0bf5d04e54b..f23f08ea25c8 100644 --- a/prqlc/prqlc-parser/src/parser/common.rs +++ b/prqlc/prqlc-parser/src/parser/common.rs @@ -27,7 +27,10 @@ pub fn keyword(kw: &'static str) -> impl Parser + } pub fn new_line() -> impl Parser + Clone { - just(TokenKind::NewLine).ignored() + just(TokenKind::NewLine) + .or(just(TokenKind::Start)) + .ignored() + .labelled("new line") } pub fn ctrl(char: char) -> impl Parser + Clone { @@ -44,23 +47,20 @@ pub fn into_stmt((annotations, kind): (Vec, StmtKind), span: Span) - } pub fn doc_comment() -> impl Parser + Clone { - // doc comments must start on a new line, so we enforce a new line before - // the doc comment (but how to handle the start of a file?) + // doc comments must start on a new line, so we enforce a new line (which + // can also be a file start) before the doc comment // // TODO: we currently lose any empty newlines between doc comments; // eventually we want to retain them - new_line() - .repeated() - .at_least(1) - .ignore_then(select! { - TokenKind::DocComment(dc) => dc, - }) - .repeated() - .at_least(1) - .collect() - .debug("doc_comment") - .map(|lines: Vec| lines.join("\n")) - .labelled("doc comment") + (new_line().repeated().at_least(1).ignore_then(select! { + TokenKind::DocComment(dc) => dc, + })) + .repeated() + .at_least(1) + .collect() + .debug("doc_comment") + .map(|lines: Vec| lines.join("\n")) + .labelled("doc comment") } pub fn with_doc_comment<'a, P, O>( @@ -95,4 +95,25 @@ mod tests { ) "###); } + + #[test] + fn test_doc_comment_or_not() { + assert_debug_snapshot!(parse_with_parser(r#"hello"#, doc_comment().or_not()).unwrap(), @"None"); + assert_debug_snapshot!(parse_with_parser(r#"hello"#, doc_comment().or_not().then_ignore(new_line().repeated()).then(ident_part())).unwrap(), @r###" + ( + None, + "hello", + ) + "###); + } + + #[test] + fn test_no_doc_comment_in_with_doc_comment() { + impl SupportsDocComment for String { + fn with_doc_comment(self, _doc_comment: Option) -> Self { + self + } + } + assert_debug_snapshot!(parse_with_parser(r#"hello"#, with_doc_comment(new_line().ignore_then(ident_part()))).unwrap(), @r###""hello""###); + } } diff --git a/prqlc/prqlc-parser/src/parser/expr.rs b/prqlc/prqlc-parser/src/parser/expr.rs index 667f34361dd1..8447565c9e13 100644 --- a/prqlc/prqlc-parser/src/parser/expr.rs +++ b/prqlc/prqlc-parser/src/parser/expr.rs @@ -76,12 +76,16 @@ pub fn expr<'a>() -> impl Parser + Clone + 'a { fn tuple( nested_expr: impl Parser + Clone, ) -> impl Parser + Clone { - ident_part() - .then_ignore(ctrl('=')) - .or_not() - .then(nested_expr) - .map(|(alias, expr)| Expr { alias, ..expr }) - .padded_by(new_line().repeated()) + new_line() + .repeated() + .ignore_then( + ident_part() + .then_ignore(ctrl('=')) + .or_not() + .then(nested_expr) + .map(|(alias, expr)| Expr { alias, ..expr }), + ) + // .padded_by(new_line().repeated()) .separated_by(ctrl(',')) .allow_trailing() .then_ignore(new_line().repeated()) @@ -103,22 +107,27 @@ fn tuple( fn array( expr: impl Parser + Clone, ) -> impl Parser + Clone { - expr.padded_by(new_line().repeated()) - .separated_by(ctrl(',')) - .allow_trailing() - .then_ignore(new_line().repeated()) - .delimited_by(ctrl('['), ctrl(']')) - .recover_with(nested_delimiters( - TokenKind::Control('['), - TokenKind::Control(']'), - [ - (TokenKind::Control('{'), TokenKind::Control('}')), - (TokenKind::Control('('), TokenKind::Control(')')), - (TokenKind::Control('['), TokenKind::Control(']')), - ], - |_| vec![], - )) - .map(ExprKind::Array) + new_line() + .repeated() + .ignore_then( + expr + // .padded_by(new_line().repeated()) + .separated_by(ctrl(',')) + .allow_trailing() + .then_ignore(new_line().repeated()) + .delimited_by(ctrl('['), ctrl(']')) + .recover_with(nested_delimiters( + TokenKind::Control('['), + TokenKind::Control(']'), + [ + (TokenKind::Control('{'), TokenKind::Control('}')), + (TokenKind::Control('('), TokenKind::Control(')')), + (TokenKind::Control('['), TokenKind::Control(']')), + ], + |_| vec![], + )) + .map(ExprKind::Array), + ) .labelled("array") } @@ -162,12 +171,16 @@ fn case( ) -> impl Parser + Clone { keyword("case") .ignore_then( - func_call(expr.clone()) - .map(Box::new) - .then_ignore(just(TokenKind::ArrowFat)) - .then(func_call(expr).map(Box::new)) - .map(|(condition, value)| SwitchCase { condition, value }) - .padded_by(new_line().repeated()) + new_line() + .repeated() + .ignore_then( + func_call(expr.clone()) + .map(Box::new) + .then_ignore(just(TokenKind::ArrowFat)) + .then(func_call(expr).map(Box::new)) + .map(|(condition, value)| SwitchCase { condition, value }), + ) + // .padded_by(new_line().repeated()) .separated_by(ctrl(',')) .allow_trailing() .then_ignore(new_line().repeated()) @@ -276,29 +289,29 @@ where new_line().repeated().at_least(1).ignored(), )); - new_line() - .repeated() - .ignore_then( + with_doc_comment( + new_line().repeated().ignore_then( ident_part() .then_ignore(ctrl('=')) .or_not() .then(expr) - .map(|(alias, expr)| Expr { alias, ..expr }) - .separated_by(pipe) - .at_least(1) - .map_with_span(|exprs, span| { - // If there's only one expr, then we don't need to wrap it - // in a pipeline — just return the lone expr. Otherwise, - // wrap them in a pipeline. - exprs.into_iter().exactly_one().unwrap_or_else(|exprs| { - ExprKind::Pipeline(Pipeline { - exprs: exprs.collect(), - }) - .into_expr(span) - }) - }), - ) - .labelled("pipeline") + .map(|(alias, expr)| Expr { alias, ..expr }), + ), + ) + .separated_by(pipe) + .at_least(1) + .map_with_span(|exprs, span| { + // If there's only one expr, then we don't need to wrap it + // in a pipeline — just return the lone expr. Otherwise, + // wrap them in a pipeline. + exprs.into_iter().exactly_one().unwrap_or_else(|exprs| { + ExprKind::Pipeline(Pipeline { + exprs: exprs.collect(), + }) + .into_expr(span) + }) + }) + .labelled("pipeline") } fn binary_op_parser<'a, Term, Op>( @@ -545,3 +558,75 @@ fn operator_or() -> impl Parser + Clone { fn operator_coalesce() -> impl Parser + Clone { just(TokenKind::Coalesce).to(BinOp::Coalesce) } + +#[cfg(test)] +mod tests { + + use insta::assert_yaml_snapshot; + + use crate::test::{parse_with_parser, trim_start}; + + use super::*; + + #[test] + fn test_expr() { + assert_yaml_snapshot!( + parse_with_parser(r#"5+5"#, trim_start().ignore_then(expr())).unwrap(), + @r###" + --- + Binary: + left: + Literal: + Integer: 5 + span: "0:0-1" + op: Add + right: + Literal: + Integer: 5 + span: "0:2-3" + span: "0:0-3" + "###); + + assert_yaml_snapshot!( + parse_with_parser(r#"derive x = 5"#, trim_start().ignore_then(expr())).unwrap(), + @r###" + --- + Ident: derive + span: "0:0-6" + "###); + } + + #[test] + fn test_pipeline() { + assert_yaml_snapshot!( + parse_with_parser(r#" + from artists + derive x = 5 + "#, trim_start().then(pipeline(expr_call()))).unwrap(), + @r###" + --- + - ~ + - Pipeline: + exprs: + - FuncCall: + name: + Ident: from + span: "0:13-17" + args: + - Ident: artists + span: "0:18-25" + span: "0:13-25" + - FuncCall: + name: + Ident: derive + span: "0:38-44" + args: + - Literal: + Integer: 5 + span: "0:49-50" + alias: x + span: "0:38-50" + span: "0:13-50" + "###); + } +} diff --git a/prqlc/prqlc-parser/src/parser/mod.rs b/prqlc/prqlc-parser/src/parser/mod.rs index 669f2f2f2c3e..b4c5dc3b18b8 100644 --- a/prqlc/prqlc-parser/src/parser/mod.rs +++ b/prqlc/prqlc-parser/src/parser/mod.rs @@ -1,5 +1,6 @@ use chumsky::{prelude::*, Stream}; +pub use self::common::new_line; use crate::error::Error; use crate::lexer::lr; use crate::span::Span; diff --git a/prqlc/prqlc-parser/src/parser/stmt.rs b/prqlc/prqlc-parser/src/parser/stmt.rs index d79e7ee8c3a0..890e5bc904aa 100644 --- a/prqlc/prqlc-parser/src/parser/stmt.rs +++ b/prqlc/prqlc-parser/src/parser/stmt.rs @@ -15,6 +15,9 @@ pub fn source() -> impl Parser, Error = PError> { with_doc_comment(query_def()) .or_not() .chain(module_contents()) + // This is the only instance we can consume newlines at the end of something, since + // this is the end of the module + .then_ignore(new_line().repeated()) .then_ignore(end()) } @@ -26,12 +29,18 @@ fn module_contents() -> impl Parser, Error = PError> { .map(|(name, stmts)| StmtKind::ModuleDef(ModuleDef { name, stmts })) .labelled("module definition"); - let annotation = just(TokenKind::Annotate) - .ignore_then(expr()) - .then_ignore(new_line().repeated()) - .map(|expr| Annotation { - expr: Box::new(expr), - }) + let annotation = new_line() + .repeated() + // TODO: we could enforce annotations starting on a new line? + // .at_least(1) + .ignore_then( + just(TokenKind::Annotate) + .ignore_then(expr()) + // .then_ignore(new_line().repeated()) + .map(|expr| Annotation { + expr: Box::new(expr), + }), + ) .labelled("annotation"); // Also need to handle new_line vs. start of file here @@ -60,6 +69,7 @@ fn module_contents() -> impl Parser, Error = PError> { fn query_def() -> impl Parser + Clone { new_line() .repeated() + .at_least(1) .ignore_then(keyword("prql")) .ignore_then( // named arg @@ -144,9 +154,13 @@ fn var_def() -> impl Parser + Clone { .labelled("variable definition"); let main_or_into = pipeline(expr_call()) - .then_ignore(new_line().repeated()) .map(Box::new) - .then(keyword("into").ignore_then(ident_part()).or_not()) + .then( + new_line() + .repeated() + .ignore_then(keyword("into").ignore_then(ident_part())) + .or_not(), + ) .map(|(value, name)| { let kind = if name.is_none() { VarDefKind::Main @@ -162,6 +176,8 @@ fn var_def() -> impl Parser + Clone { ty: None, }) }) + // TODO: this isn't really accurate, since a standard `from artists` + // also counts as this; we should change .labelled("variable definition"); let_.or(main_or_into) @@ -190,6 +206,77 @@ mod tests { use super::*; use crate::test::parse_with_parser; + #[test] + fn test_module_def() { + assert_yaml_snapshot!(parse_with_parser(r#"module hello { + + let world = 1 + + let man = module.world + + } + "#, module_contents()).unwrap(), @r###" + --- + - ModuleDef: + name: hello + stmts: + - VarDef: + kind: Let + name: world + value: + Literal: + Integer: 1 + span: "0:50-51" + span: "0:38-51" + - VarDef: + kind: Let + name: man + value: + Indirection: + base: + Ident: module + span: "0:74-80" + field: + Name: world + span: "0:74-86" + span: "0:64-86" + span: "0:11-98" + "###); + + assert_yaml_snapshot!(parse_with_parser(r#" + module hello { + let world = 1 + let man = module.world + } + "#, module_contents()).unwrap(), @r###" + --- + - ModuleDef: + name: hello + stmts: + - VarDef: + kind: Let + name: world + value: + Literal: + Integer: 1 + span: "0:50-51" + span: "0:38-51" + - VarDef: + kind: Let + name: man + value: + Indirection: + base: + Ident: module + span: "0:74-80" + field: + Name: world + span: "0:74-86" + span: "0:64-86" + span: "0:11-98" + "###); + } + #[test] fn test_doc_comment_module() { assert_yaml_snapshot!(parse_with_parser(r#" @@ -211,7 +298,7 @@ mod tests { - Ident: foo span: "0:44-47" span: "0:39-47" - span: "0:30-49" + span: "0:30-47" doc_comment: " first doc comment" "###); @@ -253,7 +340,7 @@ mod tests { - Ident: bar span: "0:108-111" span: "0:103-111" - span: "0:94-113" + span: "0:94-111" doc_comment: " second doc comment" "###); } diff --git a/prqlc/prqlc-parser/src/parser/test.rs b/prqlc/prqlc-parser/src/parser/test.rs index 0d6ff9963f0f..e8cff7a53957 100644 --- a/prqlc/prqlc-parser/src/parser/test.rs +++ b/prqlc/prqlc-parser/src/parser/test.rs @@ -1,3 +1,4 @@ +use chumsky::Parser; use insta::assert_yaml_snapshot; use crate::parser::prepare_stream; @@ -6,10 +7,13 @@ use crate::test::parse_with_parser; use crate::{error::Error, lexer::lex_source}; use crate::{lexer::lr::TokenKind, parser::pr::FuncCall}; -use super::pr::Expr; +use super::{common::new_line, pr::Expr}; fn parse_expr(source: &str) -> Result> { - parse_with_parser(source, super::expr::expr_call()) + parse_with_parser( + source, + new_line().repeated().ignore_then(super::expr::expr_call()), + ) } #[test] @@ -22,6 +26,8 @@ fn test_prepare_stream() { let mut stream = prepare_stream(tokens.0.into_iter(), input, 0); assert_yaml_snapshot!(stream.fetch_tokens().collect::>(), @r###" --- + - - Start + - "0:0-0" - - Ident: from - "0:0-4" - - Ident: artists diff --git a/prqlc/prqlc-parser/src/snapshots/prqlc_parser__test__pipeline_parse_tree.snap b/prqlc/prqlc-parser/src/snapshots/prqlc_parser__test__pipeline_parse_tree.snap index 84bca84aeba0..38604e50876c 100644 --- a/prqlc/prqlc-parser/src/snapshots/prqlc_parser__test__pipeline_parse_tree.snap +++ b/prqlc/prqlc-parser/src/snapshots/prqlc_parser__test__pipeline_parse_tree.snap @@ -188,4 +188,4 @@ expression: "parse_single(r#\"\nfrom employees\nfilter country == \"USA\" span: "0:711-713" span: "0:706-713" span: "0:1-713" - span: "0:1-714" + span: "0:0-713" diff --git a/prqlc/prqlc-parser/src/test.rs b/prqlc/prqlc-parser/src/test.rs index 2b92a7f5b00e..377e839587d4 100644 --- a/prqlc/prqlc-parser/src/test.rs +++ b/prqlc/prqlc-parser/src/test.rs @@ -2,6 +2,7 @@ use chumsky::Parser; use insta::{assert_debug_snapshot, assert_yaml_snapshot}; use std::fmt::Debug; +use crate::parser::new_line; use crate::parser::pr::Stmt; use crate::parser::prepare_stream; use crate::parser::stmt; @@ -27,10 +28,17 @@ pub(crate) fn parse_with_parser( } /// Parse into statements -fn parse_single(source: &str) -> Result, Vec> { +pub(crate) fn parse_single(source: &str) -> Result, Vec> { + // parse_with_parser(source, new_line().repeated().ignore_then(stmt::source())) parse_with_parser(source, stmt::source()) } +// TODO: move to expr singe stmts don't need it? +/// Remove leading newlines & the start token, for tests +pub(crate) fn trim_start() -> impl Parser { + new_line().repeated().ignored() +} + #[test] fn test_error_unicode_string() { // Test various unicode strings successfully parse errors. We were @@ -81,7 +89,7 @@ fn test_error_unexpected() { 0:6-7, ), reason: Simple( - "unexpected : while parsing function call", + "unexpected :", ), hints: [], code: None, @@ -365,7 +373,7 @@ fn test_basic_exprs() { - Ident: a span: "0:35-36" span: "0:28-36" - span: "0:28-36" + span: "0:0-36" "###); } @@ -543,7 +551,7 @@ fn test_function() { named_params: [] generic_type_params: [] span: "0:0-27" - span: "0:0-28" + span: "0:0-27" "###); assert_yaml_snapshot!(parse_single(r#"let count = X -> s"SUM({X})" @@ -635,7 +643,7 @@ fn test_function() { named_params: [] generic_type_params: [] span: "0:27-147" - span: "0:13-147" + span: "0:0-147" "###); assert_yaml_snapshot!(parse_single("let add = x to:a -> x + to\n").unwrap(), @r###" @@ -776,7 +784,7 @@ fn test_var_def() { SString: - String: SELECT * FROM employees span: "0:21-47" - span: "0:13-47" + span: "0:0-47" "###); assert_yaml_snapshot!(parse_single( @@ -828,7 +836,7 @@ fn test_var_def() { - Ident: x span: "0:101-102" span: "0:96-102" - span: "0:96-102" + span: "0:84-102" "###); } @@ -918,7 +926,7 @@ fn test_sql_parameters() { span: "0:37-107" span: "0:30-107" span: "0:9-107" - span: "0:9-108" + span: "0:0-107" "###); } @@ -1019,7 +1027,7 @@ join `my-proj`.`dataset`.`table` span: "0:118-126" span: "0:94-126" span: "0:1-126" - span: "0:1-127" + span: "0:0-126" "###); } @@ -1115,7 +1123,7 @@ fn test_sort() { span: "0:136-174" span: "0:131-174" span: "0:9-174" - span: "0:9-175" + span: "0:0-174" "###); } @@ -1162,7 +1170,7 @@ fn test_dates() { span: "0:39-76" span: "0:32-76" span: "0:9-76" - span: "0:9-77" + span: "0:0-76" "###); } @@ -1186,7 +1194,7 @@ fn test_multiline_string() { span: "0:20-36" alias: x span: "0:9-36" - span: "0:9-37" + span: "0:0-36" "### ) } @@ -1227,7 +1235,7 @@ derive x = 5 alias: x span: "0:14-26" span: "0:1-26" - span: "0:1-31" + span: "0:0-26" "### ) } @@ -1270,7 +1278,7 @@ fn test_coalesce() { alias: amount span: "0:32-59" span: "0:9-59" - span: "0:9-60" + span: "0:0-59" "### ) } @@ -1294,7 +1302,7 @@ fn test_literal() { span: "0:20-24" alias: x span: "0:9-24" - span: "0:9-25" + span: "0:0-24" "###) } @@ -1366,7 +1374,7 @@ fn test_allowed_idents() { span: "0:140-172" span: "0:133-172" span: "0:9-172" - span: "0:9-173" + span: "0:0-172" "###) } @@ -1459,7 +1467,7 @@ fn test_gt_lt_gte_lte() { span: "0:127-139" span: "0:120-139" span: "0:9-139" - span: "0:9-140" + span: "0:0-139" "###) } @@ -1500,7 +1508,7 @@ join s=salaries (==id) span: "0:33-37" span: "0:16-38" span: "0:1-38" - span: "0:1-39" + span: "0:0-38" "###); } @@ -1539,7 +1547,7 @@ fn test_var_defs() { value: Ident: x span: "0:17-42" - span: "0:9-42" + span: "0:0-42" "###); assert_yaml_snapshot!(parse_single(r#" @@ -1553,7 +1561,7 @@ fn test_var_defs() { value: Ident: x span: "0:9-10" - span: "0:9-25" + span: "0:0-25" "###); assert_yaml_snapshot!(parse_single(r#" @@ -1566,7 +1574,7 @@ fn test_var_defs() { value: Ident: x span: "0:9-10" - span: "0:9-11" + span: "0:0-10" "###); } @@ -1589,7 +1597,7 @@ fn test_array() { Integer: 2 span: "0:21-22" span: "0:17-24" - span: "0:9-24" + span: "0:0-24" - VarDef: kind: Let name: a @@ -1602,7 +1610,7 @@ fn test_array() { String: hello span: "0:49-56" span: "0:41-57" - span: "0:33-57" + span: "0:24-57" "###); } @@ -1637,7 +1645,7 @@ fn test_annotation() { named_params: [] generic_type_params: [] span: "0:49-61" - span: "0:9-61" + span: "0:0-61" annotations: - expr: Tuple: @@ -1649,7 +1657,8 @@ fn test_annotation() { "###); parse_single( r#" - @{binding_strength=1} let add = a b -> a + b + @{binding_strength=1} + let add = a b -> a + b "#, ) .unwrap(); @@ -1753,7 +1762,7 @@ fn test_target() { span: "0:72-77" span: "0:65-77" span: "0:45-77" - span: "0:45-78" + span: "0:34-77" "###); } @@ -1841,7 +1850,7 @@ fn doc_comment() { alias: x span: "0:22-34" span: "0:5-34" - span: "0:0-35" + span: "0:0-34" "###); assert_yaml_snapshot!(parse_single(r###" @@ -1863,15 +1872,24 @@ fn doc_comment() { args: - Ident: artists span: "0:10-17" - - Ident: derive - span: "0:51-57" - doc_comment: " This is a doc comment" + span: "0:5-17" + span: "0:0-17" + - VarDef: + kind: Main + name: main + value: + FuncCall: + name: + Ident: derive + span: "0:53-59" + args: - Literal: Integer: 5 - span: "0:62-63" + span: "0:64-65" alias: x - span: "0:5-63" - span: "0:5-64" + span: "0:53-65" + span: "0:47-65" + doc_comment: " This is a doc comment" "###); assert_yaml_snapshot!(parse_single(r###" @@ -1884,22 +1902,29 @@ fn doc_comment() { kind: Main name: main value: - FuncCall: - name: - Ident: from - span: "0:5-9" - args: - - Ident: artists - span: "0:10-17" - - Ident: derive - span: "0:51-57" - doc_comment: " This is a doc comment" - - Literal: - Integer: 5 - span: "0:62-63" - alias: x - span: "0:5-63" - span: "0:5-64" + Pipeline: + exprs: + - FuncCall: + name: + Ident: from + span: "0:34-38" + args: + - Ident: artists + span: "0:39-46" + span: "0:34-46" + - FuncCall: + name: + Ident: derive + span: "0:51-57" + args: + - Literal: + Integer: 5 + span: "0:62-63" + alias: x + span: "0:51-63" + span: "0:34-63" + span: "0:29-63" + doc_comment: " This is a doc comment" "###); assert_debug_snapshot!(parse_single(r###" @@ -1912,7 +1937,7 @@ fn doc_comment() { 0:18-42, ), reason: Simple( - "unexpected #! This is a doc comment\n while parsing function call", + "unexpected #! This is a doc comment\n", ), hints: [], code: None,