diff --git a/CHANGELOG.md b/CHANGELOG.md index e9f46c18..c9abcf29 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,45 @@ # Development version +- Autobracing is a new feature applied to if statements, for loops, while loops, + repeat loops, and function definitions. This feature will automatically add + `{}` around the body of these code elements in certain cases to maximize + readability, consistency, and portability (#225, #334). + + For example: + + ```r + if (condition) + a + + # Becomes: + if (condition) { + a + } + ``` + + ```r + fn <- function( + a, b + ) a + b + + # Becomes: + fn <- function( + a, + b + ) { + a + b + } + ``` + + Single line if statements and function definitions are still allowed in certain contexts: + + ```r + list(a = if (is.null(x)) NA else x) + + map(xs, function(x) x + 1) + ``` + # 0.5.0 diff --git a/crates/air_r_formatter/src/comments.rs b/crates/air_r_formatter/src/comments.rs index 11f36566..44147fc6 100644 --- a/crates/air_r_formatter/src/comments.rs +++ b/crates/air_r_formatter/src/comments.rs @@ -2,9 +2,13 @@ use crate::prelude::*; use air_r_syntax::AnyRExpression; use air_r_syntax::RArgument; use air_r_syntax::RArgumentNameClause; +use air_r_syntax::RElseClause; +use air_r_syntax::RForStatement; +use air_r_syntax::RFunctionDefinition; use air_r_syntax::RIfStatement; use air_r_syntax::RLanguage; use air_r_syntax::RParenthesizedExpression; +use air_r_syntax::RRepeatStatement; use air_r_syntax::RSyntaxKind; use air_r_syntax::RSyntaxToken; use air_r_syntax::RWhileStatement; @@ -16,6 +20,7 @@ use biome_formatter::comments::Comments; use biome_formatter::comments::DecoratedComment; use biome_formatter::comments::SourceComment; use biome_formatter::write; +use biome_rowan::AstNode; use biome_rowan::SyntaxTriviaPieceComments; use comments::Directive; use comments::FormatDirective; @@ -59,13 +64,13 @@ impl CommentStyle for RCommentStyle { &self, comment: DecoratedComment, ) -> CommentPlacement { - // TODO: Implement more rule based comment placement, see `biome_js_formatter` match comment.text_position() { CommentTextPosition::EndOfLine => handle_for_comment(comment) .or_else(handle_function_comment) .or_else(handle_while_comment) .or_else(handle_repeat_comment) .or_else(handle_if_statement_comment) + .or_else(handle_else_clause_comment) .or_else(handle_parenthesized_expression_comment) .or_else(handle_argument_name_clause_comment) .or_else(handle_argument_comment) @@ -75,6 +80,7 @@ impl CommentStyle for RCommentStyle { .or_else(handle_while_comment) .or_else(handle_repeat_comment) .or_else(handle_if_statement_comment) + .or_else(handle_else_clause_comment) .or_else(handle_parenthesized_expression_comment) .or_else(handle_argument_name_clause_comment) .or_else(handle_argument_comment) @@ -88,159 +94,572 @@ impl CommentStyle for RCommentStyle { } fn handle_for_comment(comment: DecoratedComment) -> CommentPlacement { - let enclosing = comment.enclosing_node(); - - if enclosing.kind() != RSyntaxKind::R_FOR_STATEMENT { + let Some(for_statement) = RForStatement::cast_ref(comment.enclosing_node()) else { return CommentPlacement::Default(comment); - } + }; - if comment.text_position().is_own_line() { - // Lift comment up as a leading comment on the whole `R_FOR_STATEMENT` node - return CommentPlacement::leading(enclosing.clone(), comment); + // If the following node is the `body`, we want to lead the first element of the body. + // But we only want to do this if we are past the `)` of the loop sequence, so we have + // to check that the following token isn't `)` too. + // + // + // ```r + // for (i in 1:5) # dangles {} + // { + // } + // ``` + // + // ```r + // for (i in 1:5) # leads `a` + // { + // a + // } + // ``` + // + // Particularly important for prepping for autobracing here + // + // ```r + // for (i in 1:5) # leads `a` + // a + // ``` + // + // This is consistent to when there are already braces there. + // + // ```r + // for (i in 1:5) { # leads `a` + // a + // } + // ``` + // + // Here the following node is the `body`, but the following token is `)`. We want + // to avoid stealing this comment. + // + // ```r + // for (i in 1:5 # comment + // ) { + // } + // ``` + if let Ok(body) = for_statement.body() { + if let Some(following) = comment.following_node() { + if let Some(following_token) = comment.following_token() { + if body.syntax() == following && following_token.kind() != RSyntaxKind::R_PAREN { + return place_leading_or_dangling_body_comment(body, comment); + } + } + } } - CommentPlacement::Default(comment) + // Otherwise if the comment is "enclosed" by the `for_statement` then it can only be + // in one of a few places, none of which are particularly meaningful, so we move the + // comment to lead the whole `for_statement` + // + // ```r + // for # comment + // (i in 1:5) { + // } + // ``` + // + // ```r + // for ( # comment + // i in 1:5) { + // } + // ``` + // + // ```r + // for (i # comment + // in 1:5) { + // } + // ``` + // + // ```r + // for (i in # comment + // 1:5) { + // } + // ``` + // + // ```r + // for (i in + // # comment + // 1:5) { + // } + // ``` + // + // ```r + // for (i in 1:5 # comment + // ) { + // } + // ``` + CommentPlacement::leading(for_statement.into_syntax(), comment) } fn handle_while_comment(comment: DecoratedComment) -> CommentPlacement { - let Some(enclosing) = RWhileStatement::cast_ref(comment.enclosing_node()) else { + let Some(while_statement) = RWhileStatement::cast_ref(comment.enclosing_node()) else { return CommentPlacement::Default(comment); }; - if let Some(preceding) = comment.preceding_node() { - // Make comments directly before the condition `)` trailing - // comments of the condition itself (rather than leading comments of - // the `body` node) - // - // ```r - // while ( - // cond - // # comment - // ) { - // } - // ``` - if comment - .following_token() - .is_some_and(|token| token.kind() == RSyntaxKind::R_PAREN) - { - return CommentPlacement::trailing(preceding.clone(), comment); + // If the following node is the `body`, we want to lead the first element of the body. + // But we only want to do this if we are past the `)` of the loop sequence, so we have + // to check that the following token isn't `)` too. + // + // + // ```r + // while (condition) # dangles {} + // { + // } + // ``` + // + // ```r + // while (condition) # leads `a` + // { + // a + // } + // ``` + // + // Particularly important for prepping for autobracing here + // + // ```r + // while (condition) # leads `a` + // a + // ``` + // + // This is consistent to when there are already braces there. + // + // ```r + // while (condition) { # leads `a` + // a + // } + // ``` + // + // Here the following node is the `body`, but the following token is `)`. We want + // to avoid stealing this comment. + // + // ```r + // while (condition # comment + // ) { + // } + // ``` + if let Ok(body) = while_statement.body() { + if let Some(following) = comment.following_node() { + if let Some(following_token) = comment.following_token() { + if body.syntax() == following && following_token.kind() != RSyntaxKind::R_PAREN { + return place_leading_or_dangling_body_comment(body, comment); + } + } } } - // Check that the `body` of the while loop is identical to the `following` - // node of the comment. While loops also have a `condition` that can be - // any R expression, so we need to differentiate here. - let Ok(body) = enclosing.body() else { - return CommentPlacement::Default(comment); - }; - let Some(following) = comment.following_node() else { - return CommentPlacement::Default(comment); - }; - if body.syntax() != following { - return CommentPlacement::Default(comment); - } - - // Handle cases like: + // Otherwise if the comment is "enclosed" by the `while_statement` then it can only be + // in one of a few places, none of which are particularly meaningful, so we move the + // comment to lead the whole `while_statement` // // ```r - // while (a) # comment - // {} + // while # comment + // (condition) { + // } // ``` - place_leading_or_dangling_body_comment(body, comment) + // + // ```r + // while ( # comment + // condition) { + // } + // ``` + // + // ```r + // while ( + // # comment + // condition) { + // } + // ``` + // + // ```r + // while (condition # comment + // ) { + // } + // ``` + CommentPlacement::leading(while_statement.into_syntax(), comment) } fn handle_repeat_comment(comment: DecoratedComment) -> CommentPlacement { - if !matches!( - comment.enclosing_node().kind(), - RSyntaxKind::R_REPEAT_STATEMENT - ) { - return CommentPlacement::Default(comment); - }; - - // Repeat statements have a `repeat` token and a `body` field, and - // only the `body` can be an `AnyRExpression`. - let Some(body) = comment.following_node().and_then(AnyRExpression::cast_ref) else { + let Some(repeat_statement) = RRepeatStatement::cast_ref(comment.enclosing_node()) else { return CommentPlacement::Default(comment); }; - // Handle cases like: + // If the comment is "enclosed" by the `repeat_statement` then it can only be + // in one of a few places, all of which should move the comment onto the first + // element of the `body` + // + // ```r + // repeat # dangles {} + // { + // } + // ``` + // + // ```r + // repeat + // # dangles {} + // { + // } + // ``` // // ```r - // repeat # comment - // {} + // repeat # leads `a` + // { + // a + // } // ``` - place_leading_or_dangling_body_comment(body, comment) + // + // Particularly important for prepping for autobracing here + // + // ```r + // repeat # leads `a` + // a + // ``` + // + // This is consistent to when there are already braces there. + // + // ```r + // repeat { # leads `a` + // a + // } + // ``` + if let Ok(body) = repeat_statement.body() { + return place_leading_or_dangling_body_comment(body, comment); + } + + // We don't expect to ever fall through to here, but if we do for some unknown reason + // then we make the comment lead the whole `repeat_statement` to be consistent with + // `for_statement` and `while_statement` + CommentPlacement::leading(repeat_statement.into_syntax(), comment) } fn handle_function_comment(comment: DecoratedComment) -> CommentPlacement { - if !matches!( - comment.enclosing_node().kind(), - RSyntaxKind::R_FUNCTION_DEFINITION - ) { + let Some(function_definition) = RFunctionDefinition::cast_ref(comment.enclosing_node()) else { return CommentPlacement::Default(comment); }; - // Function definitions have `name`, `parameters`, and `body` fields, and - // only the `body` can be an `AnyRExpression`. - let Some(body) = comment.following_node().and_then(AnyRExpression::cast_ref) else { - return CommentPlacement::Default(comment); + // If the following node is the `parameters`, we make the comment lead the whole + // `function_definition` node + // + // ```r + // function # becomes leading on everything + // () a + // ``` + // + // ```r + // function + // # becomes leading on everything + // () a + // ``` + if let Ok(parameters) = function_definition.parameters() { + if let Some(following) = comment.following_node() { + if parameters.syntax() == following { + return CommentPlacement::leading(function_definition.into_syntax(), comment); + } + }; + }; + + // If the following node is the `body`, we make the comment lead the first node of + // the `body`. + // + // ```r + // function() # becomes leading on `a` + // a + // ``` + // + // ```r + // function() # becomes leading on `a` + // { + // a + // } + // ``` + // + // This is consistent with what happens when the comment is already after an opening + // `{` + // + // ```r + // function() { # becomes leading on `a` + // a + // } + // ``` + if let Ok(body) = function_definition.body() { + if let Some(following) = comment.following_node() { + if body.syntax() == following { + return place_leading_or_dangling_body_comment(body, comment); + } + }; }; - place_leading_or_dangling_body_comment(body, comment) + CommentPlacement::Default(comment) } fn handle_if_statement_comment( comment: DecoratedComment, ) -> CommentPlacement { - match (comment.enclosing_node().kind(), comment.following_node()) { - (RSyntaxKind::R_IF_STATEMENT, Some(following)) => { - let if_statement = RIfStatement::unwrap_cast(comment.enclosing_node().clone()); - - if let Some(preceding) = comment.preceding_node() { - // Make comments directly before the condition `)` trailing - // comments of the condition itself - // - // ```r - // if ( - // cond - // # comment - // ) { - // } - // ``` - if comment - .following_token() - .is_some_and(|token| token.kind() == RSyntaxKind::R_PAREN) - { - return CommentPlacement::trailing(preceding.clone(), comment); - } - } + let Some(if_statement) = RIfStatement::cast_ref(comment.enclosing_node()) else { + return CommentPlacement::Default(comment); + }; - // Figure out if this is a comment that comes directly before the - // `consequence` and after the `)`, in which case we move it onto - // the `consequence` - // - // ```r - // if (cond) # comment - // TRUE - // ``` - if let Ok(consequence) = if_statement.consequence() { - if consequence.syntax() == following { - return place_leading_or_dangling_body_comment(consequence, comment); - } + let Ok(condition) = if_statement.condition() else { + // Bail with default placement if we don't have a `condition`, should + // be unexpected + return CommentPlacement::Default(comment); + }; + + let Ok(consequence) = if_statement.consequence() else { + // Bail with default placement if we don't have a `consequence`, should + // be unexpected + return CommentPlacement::Default(comment); + }; + + // Make comments directly after the `condition` trailing + // comments of the `condition` itself + // + // ```r + // if ( + // condition + // # comment + // ) { + // } + // ``` + // + // But don't steal comments that belong on the `consequence`, like + // below. Here the preceding node is the `condition`. To do this right, + // we also force the following token to be `)`. + // + // ```r + // if (condition) # comment + // consequence + // ``` + if let Some(preceding) = comment.preceding_node() { + if let Some(following_token) = comment.following_token() { + if condition.syntax() == preceding + && matches!(following_token.kind(), RSyntaxKind::R_PAREN) + { + return CommentPlacement::trailing(preceding.clone(), comment); } } - (RSyntaxKind::R_ELSE_CLAUSE, _) => { - // TODO: Handle else clause comments in some way? See JS for an example. - // fall through + } + + // Figure out if this is a comment that comes directly before the + // `consequence` and after the `)`, in which case we move it onto + // the `consequence` to prepare for autobracing + // + // ```r + // if (condition) # comment + // consequence + // ``` + // + // This aligns with behavior when braces already exist. Here, the comment's + // default placement makes it leading on `consequence` (the enclosing node is + // the braced expression, and there are no preceding nodes, so it attaches as leading + // on the following node). + // + // ```r + // if (condition) { # comment + // consequence + // } + // ``` + // + // It is important that we first check for comments after the `condition` + // but before the `)`, which we do above, otherwise this will steal this + // comment and place it on `consequence` when it really belongs to `condition` + // + // ```r + // if ( + // condition + // # comment + // ) { + // } + // ``` + if let Some(following) = comment.following_node() { + if consequence.syntax() == following { + return place_leading_or_dangling_body_comment(consequence, comment); + } + } + + // Move comments directly before an `else_clause` to the correct location + // + // Most `else` related handling is done by `handle_else_clause_comment()`, + // but when the comment is right before the `else` token, it is "enclosed" + // by the if statement node instead. We handle all of those cases here. + // + // We try extremely hard to force `} else` onto the same line with nothing + // between them, for maximum portability of the if/else statement. This + // requires moving the comment onto `consequence` or `alternative`. + // + // We greatly prefer creating leading comments over dangling comments when we move + // them, as they play much nicer with idempotence. + // + // ```r + // { + // if (cond) this # becomes leading on `this` + // else that + // } + // ``` + // + // ```r + // { + // if (cond) this + // # becomes leading on `that` + // else that + // } + // ``` + // + // ```r + // { + // if (cond) { + // this + // } # becomes leading on `this` + // else { + // that + // } + // } + // ``` + // + // ```r + // { + // if (cond) { + // + // } # becomes dangling on {} + // else { + // that + // } + // } + // ``` + // + // ```r + // { + // if (cond) { + // this + // } + // # becomes leading on `that` + // else { + // that + // } + // } + // ``` + // + // ```r + // { + // if (cond) { + // this + // } + // # becomes dangling on `{}` + // else { + // } + // } + // ``` + // + // ```r + // { + // if (cond) { + // this + // } + // # becomes leading on `that` + // else if (cond) { + // that + // } + // } + // ``` + if let Some(else_clause) = if_statement.else_clause() { + let Ok(alternative) = else_clause.alternative() else { + // Bail with default placement if we don't have a `alternative`, should + // be unexpected + return CommentPlacement::Default(comment); + }; + + if let Some(following) = comment.following_node() { + if else_clause.syntax() == following { + return match comment.text_position() { + // End of line comments lead the `consequence` body + CommentTextPosition::EndOfLine => { + place_leading_or_dangling_body_comment(consequence, comment) + } + // Own line comments lead the `alternative` body + CommentTextPosition::OwnLine => { + place_leading_or_dangling_alternative_comment(alternative, comment) + } + CommentTextPosition::SameLine => { + unreachable!("Inline comments aren't possible in R") + } + }; + } } - _ => { - // fall through + } + + CommentPlacement::Default(comment) +} + +fn handle_else_clause_comment(comment: DecoratedComment) -> CommentPlacement { + let Some(else_clause) = RElseClause::cast_ref(comment.enclosing_node()) else { + return CommentPlacement::Default(comment); + }; + + let Ok(alternative) = else_clause.alternative() else { + // Bail with default placement if we don't have a `alternative`, should + // be unexpected + return CommentPlacement::Default(comment); + }; + + // Comments following the `else` token but before the `alternative` are enclosed by + // the `else_clause`. We make these comments leading on the `alternative`. + // + // ```r + // { + // if (condition) a + // else # becomes leading on `b` + // { + // b + // } + // } + // ``` + // + // ```r + // { + // if (condition) a + // else + // # becomes leading on `b` + // { + // b + // } + // } + // ``` + if let Some(following) = comment.following_node() { + if alternative.syntax() == following { + return place_leading_or_dangling_alternative_comment(alternative, comment); } } CommentPlacement::Default(comment) } +/// Basically [place_leading_or_dangling_body_comment()], but moves comments on an if +/// statement `alternative` onto the body of that if statement to handle if chaining a bit +/// nicer +/// +/// ```r +/// { +/// if (condition) { +/// a +/// } else # becomes leading on `b` rather than leading on if statement `alternative` +/// if (condition) { +/// b +/// } +/// } +/// ``` +fn place_leading_or_dangling_alternative_comment( + alternative: AnyRExpression, + comment: DecoratedComment, +) -> CommentPlacement { + match alternative { + AnyRExpression::RIfStatement(node) => match node.consequence() { + Ok(consequence) => place_leading_or_dangling_body_comment(consequence, comment), + Err(_) => CommentPlacement::Default(comment), + }, + node => place_leading_or_dangling_body_comment(node, comment), + } +} + fn handle_parenthesized_expression_comment( comment: DecoratedComment, ) -> CommentPlacement { @@ -577,7 +996,7 @@ fn handle_hole_argument_comment( /// ``` /// /// ```r -/// if (cond) # becomes leading on `{}` +/// if (cond) # becomes dangling on `{}` /// { /// } /// ``` diff --git a/crates/air_r_formatter/src/lib.rs b/crates/air_r_formatter/src/lib.rs index fba98faf..dbef7efd 100644 --- a/crates/air_r_formatter/src/lib.rs +++ b/crates/air_r_formatter/src/lib.rs @@ -24,10 +24,10 @@ mod cst; pub mod either; pub mod formatter_ext; pub mod joiner_ext; +pub mod loop_body; mod prelude; mod r; pub(crate) mod separated; -mod statement_body; mod string_literal; #[rustfmt::skip] diff --git a/crates/air_r_formatter/src/loop_body.rs b/crates/air_r_formatter/src/loop_body.rs new file mode 100644 index 00000000..99919c94 --- /dev/null +++ b/crates/air_r_formatter/src/loop_body.rs @@ -0,0 +1,33 @@ +use crate::prelude::*; +use air_r_syntax::AnyRExpression; +use biome_formatter::format_args; +use biome_formatter::write; + +pub(crate) struct FormatLoopBody<'a> { + node: &'a AnyRExpression, +} + +impl<'a> FormatLoopBody<'a> { + pub fn new(node: &'a AnyRExpression) -> Self { + Self { node } + } +} + +impl Format for FormatLoopBody<'_> { + fn fmt(&self, f: &mut Formatter) -> FormatResult<()> { + if matches!(&self.node, AnyRExpression::RBracedExpressions(_)) { + // If it's already braced, just write it + write!(f, [self.node.format()]) + } else { + // Otherwise, brace it and block indent the body + write!( + f, + [ + text("{"), + block_indent(&format_args![&self.node.format()]), + text("}") + ] + ) + } + } +} diff --git a/crates/air_r_formatter/src/r/auxiliary/call_arguments.rs b/crates/air_r_formatter/src/r/auxiliary/call_arguments.rs index d68f9f81..73fad7df 100644 --- a/crates/air_r_formatter/src/r/auxiliary/call_arguments.rs +++ b/crates/air_r_formatter/src/r/auxiliary/call_arguments.rs @@ -123,10 +123,31 @@ impl RCallLikeArguments { /// /// Example: /// + /// An inline function without braces is an example where "most flat" is considered. + /// We consider this as groupable because it is possible that a long function body + /// or long function signature can cause a break, which may benefit from the middle + /// variant before expanding all the way to the most expanded variant. + /// /// ```r - /// # NOTE: Not currently possible, as the `{}` always force a break right now, - /// # but this would be an example if `{}` didn't force a break. - /// map(xs, function(x) {}) + /// # Most flat wins + /// map(xs, function(x) x + 1) + /// + /// # Middle variant wins + /// map(xs, function(x) x + a_really_long_thing_here + a_really_long_thing_there) + /// # -> + /// map(xs, function(x) { + /// x + a_really_long_thing_here + a_really_long_thing_there + /// }) + /// + /// # Most expanded wins (middle variant wouldn't fit within line length) + /// map(xs_are_extremely_long_too, function(this_argument_is_really_long, option = "so is this") this_argument_is_really_long) + /// # -> + /// map( + /// xs_are_extremely_long_too, + /// function(this_argument_is_really_long, option = "so is this") { + /// this_argument_is_really_long + /// } + /// ) /// ``` /// /// This variant is removed from the set if we detect that the grouped argument @@ -134,14 +155,28 @@ impl RCallLikeArguments { /// parameters, we bail entirely and use the most expanded form, as noted /// at the beginning of this documentation page). /// - /// Note that because `{}` currently unconditionally force a break, and because - /// we only go down this path when we have a `{}` to begin with, that means that - /// currently the most flat variant is always removed. There is an - /// `unreachable!()` in the code to assert this. We can't simply remove the - /// `most_flat` code path though, because it is also where we detect if a - /// parameter forces a break, triggering one of our early exists. Additionally, - /// in the future we may allow `{}` to not force a break, meaning this variant - /// may come back into play. + /// ```r + /// # Forced break in the body + /// map(xs, function(x) { + /// x + /// }) + /// + /// # This is a forced break in the body too, and triggers autobracing and + /// # results in the middle variant winning + /// map(xs, function(x) + /// x) + /// ``` + /// + /// Note that because `{}` currently unconditionally force a break, that means that + /// currently the most flat variant is always removed when `{}` are present. In the + /// future we will make empty `{}` never break and we will declare cases of empty + /// `{}` as not groupable, so it won't go through this best fitting process at all. + /// + /// ```r + /// # NOTE: Currently considered groupable, but the `{}` currently unconditionally + /// # force a break, so the `most_flat` variant is always removed + /// map(xs, function(x) {}) + /// ``` /// /// ```r /// # NOTE: We explicitly disallow curly-curly as a groupable argument, @@ -297,7 +332,7 @@ impl RCallLikeArguments { // Write the most flat variant with the first or last argument grouped // (but not forcibly expanded) - let _most_flat = { + let most_flat = { let snapshot = f.state_snapshot(); let mut buffer = VecBuffer::new(f.state_mut()); buffer.write_element(FormatElement::Tag(Tag::StartEntry))?; @@ -375,14 +410,15 @@ impl RCallLikeArguments { buffer.into_vec().into_boxed_slice() }; - // If the grouped content breaks, then we can skip the most_flat variant, - // since we already know that it won't be fitting on a single line. let variants = if grouped_breaks { + // If the grouped content breaks, then we can skip the most_flat variant, + // since we already know that it won't be fitting on a single line. We can + // also go ahead and signal that we will be expanding. write!(f, [expand_parent()])?; vec![middle_variant, most_expanded.into_boxed_slice()] } else { - unreachable!("`grouped_breaks` is currently always `true`."); - // vec![most_flat, middle_variant, most_expanded.into_boxed_slice()] + // Otherwise we best fit between all variants + vec![most_flat, middle_variant, most_expanded.into_boxed_slice()] }; // SAFETY: Safe because variants is guaranteed to contain >=2 entries: @@ -412,7 +448,8 @@ impl Format for RCallLikeArguments { // ) // ``` // - // If we don't handle it specially, it can break idempotence + // If we don't handle it specially, it can break idempotence. + // Same as `RParameters`. if items.is_empty() { return write!( f, @@ -1086,7 +1123,7 @@ fn argument_is_groupable(argument: &AnyRExpression) -> SyntaxResult { // }) // ``` // - // Empty braces always expand, so they benefit from grouping + // Empty braces currently always expand, so they benefit from grouping // // ```r // with(data, { @@ -1097,7 +1134,7 @@ fn argument_is_groupable(argument: &AnyRExpression) -> SyntaxResult { // // ```r // with(data, { - // // comment + // # comment // }) // ``` // @@ -1118,15 +1155,91 @@ fn argument_is_groupable(argument: &AnyRExpression) -> SyntaxResult { // ``` RBracedExpressions(node) => as_curly_curly(node).is_none(), + // Currently, every kind of function definition can benefit from grouping. + // + // - If the body has `{}` and is not empty, the braces will be expanded across + // multiple lines, so it can benefit from grouping. + // + // - If the body does not have `{}`, we may keep the function definition + // completely flat on one line, or we may expand it over multiple lines if + // it exceeds the line length or a persistent line break forces a break. If + // it were to break, it would benefit from grouping. + // + // - NOTE: If the body has `{}` and is empty (including no comments), then + // currently those are always expanded anyways, so it can benefit from grouping. + // In the future we will force empty `{}` to not expand, and when that happens + // we should return `false` from here to signal that it can't benefit from + // grouping (use the same rule as `RBracedExpressions` above, but applied to + // the `node.body()`). + // + // Here are some examples: + // + // Trailing function definition with braces is the classic example of something + // that benefits from grouping + // // ```r - // map(a, function(x) { + // map(xs, function(x) { // x // }) // ``` - RFunctionDefinition(node) => { - let body = node.body()?; - matches!(&body, AnyRExpression::RBracedExpressions(_)) - } + // + // When the braces are empty, it still benefits from grouping right now because + // we currently always expand the braces. + // + // ```r + // map(xs, function(x) { + // }) + // ``` + // + // With a dangling comment in empty braces, we always benefit from grouping + // + // ```r + // map(xs, function(x) { + // # comment + // }) + // ``` + // + // Long function definition that would be split over multiple lines, triggering + // autobracing, and would benefit from grouping + // + // ```r + // map(xs, function(x) x + a_really_really_long_name + another_really_long_name) + // + // # Becomes: + // map(xs, function(x) { + // x + a_really_really_long_name + another_really_long_name + // }) + // + // # Which is preferred over fully expanding: + // map( + // xs, + // function(x) { + // x + a_really_really_long_name + another_really_long_name + // } + // ) + // ``` + // + // Line break in the function definition body, triggering autobracing, and would + // benefit from grouping + // + // ```r + // map(xs, function(x) + // x + 1) + // + // # Becomes: + // map(xs, function(x) { + // x + 1 + // }) + // + // # Which is preferred over fully expanding: + // map( + // xs, + // function(x) { + // x + 1 + // } + // ) + // ``` + RFunctionDefinition(_) => true, // Nothing else benefits from grouping _ => false, diff --git a/crates/air_r_formatter/src/r/auxiliary/else_clause.rs b/crates/air_r_formatter/src/r/auxiliary/else_clause.rs index 04d85a5c..8b430792 100644 --- a/crates/air_r_formatter/src/r/auxiliary/else_clause.rs +++ b/crates/air_r_formatter/src/r/auxiliary/else_clause.rs @@ -1,30 +1,46 @@ use crate::prelude::*; -use crate::statement_body::FormatStatementBody; +use crate::r::auxiliary::if_statement::BracedExpressions; +use crate::r::auxiliary::if_statement::FormatIfBody; use air_r_syntax::RElseClause; use air_r_syntax::RElseClauseFields; use biome_formatter::write; +use biome_formatter::FormatRuleWithOptions; #[derive(Debug, Clone, Default)] -pub(crate) struct FormatRElseClause; +pub(crate) struct FormatRElseClause { + pub(crate) braced_expressions: BracedExpressions, +} + +#[derive(Debug)] +pub(crate) struct FormatRElseClauseOptions { + pub(crate) braced_expressions: BracedExpressions, +} + +impl FormatRuleWithOptions for FormatRElseClause { + type Options = FormatRElseClauseOptions; + + fn with_options(mut self, options: Self::Options) -> Self { + self.braced_expressions = options.braced_expressions; + self + } +} + impl FormatNodeRule for FormatRElseClause { fn fmt_fields(&self, node: &RElseClause, f: &mut RFormatter) -> FormatResult<()> { - use air_r_syntax::AnyRExpression::*; - let RElseClauseFields { else_token, alternative, } = node.as_fields(); + let else_token = else_token?; let alternative = alternative?; write!( f, [ else_token.format(), - group( - &FormatStatementBody::new(&alternative) - .with_forced_space(matches!(alternative, RIfStatement(_))) - ) + space(), + &FormatIfBody::new_alternative(&alternative, self.braced_expressions) ] ) } diff --git a/crates/air_r_formatter/src/r/auxiliary/for_statement.rs b/crates/air_r_formatter/src/r/auxiliary/for_statement.rs index 0eb61087..f0c45d04 100644 --- a/crates/air_r_formatter/src/r/auxiliary/for_statement.rs +++ b/crates/air_r_formatter/src/r/auxiliary/for_statement.rs @@ -1,5 +1,5 @@ +use crate::loop_body::FormatLoopBody; use crate::prelude::*; -use crate::statement_body::FormatStatementBody; use air_r_syntax::RForStatement; use air_r_syntax::RForStatementFields; use biome_formatter::format_args; @@ -31,7 +31,8 @@ impl FormatNodeRule for FormatRForStatement { space(), sequence.format(), r_paren_token.format(), - FormatStatementBody::new(&body?) + space(), + FormatLoopBody::new(&body?) ))] ) } diff --git a/crates/air_r_formatter/src/r/auxiliary/function_definition.rs b/crates/air_r_formatter/src/r/auxiliary/function_definition.rs index cb052f97..e95e1cd2 100644 --- a/crates/air_r_formatter/src/r/auxiliary/function_definition.rs +++ b/crates/air_r_formatter/src/r/auxiliary/function_definition.rs @@ -1,9 +1,11 @@ use crate::prelude::*; -use crate::statement_body::FormatStatementBody; +use air_r_syntax::AnyRExpression; use air_r_syntax::RFunctionDefinition; +use biome_formatter::format_args; use biome_formatter::write; use biome_formatter::FormatRuleWithOptions; use biome_formatter::RemoveSoftLinesBuffer; +use biome_rowan::SyntaxResult; use super::call_arguments::GroupedCallArgumentLayout; @@ -82,13 +84,76 @@ impl FormatFunction { Ok(()) }); + // The `group()` should contain all elements of the function definition, + // which allows `FormatFunctionBody` to autobrace if any element forces a + // break write!( f, - [ + [group(&format_args!( name.format(), &format_parameters, - group(&FormatStatementBody::new(&body)) - ] + space(), + FormatFunctionBody::new(&body) + ))] ) } } + +pub(crate) struct FormatFunctionBody<'a> { + node: &'a AnyRExpression, +} + +impl<'a> FormatFunctionBody<'a> { + pub fn new(node: &'a AnyRExpression) -> Self { + Self { node } + } +} + +impl Format for FormatFunctionBody<'_> { + fn fmt(&self, f: &mut Formatter) -> FormatResult<()> { + match self.node { + // Body already has braces, just format it + AnyRExpression::RBracedExpressions(node) => { + write!(f, [node.format()]) + } + // Body does not have braces yet + node => { + if should_force_braced_expressions(node)? { + write!( + f, + [ + text("{"), + block_indent(&format_args![&node.format()]), + text("}") + ] + ) + } else { + write!( + f, + [ + if_group_breaks(&text("{")), + soft_block_indent(&format_args![&node.format()]), + if_group_breaks(&text("}")) + ] + ) + } + } + } + } +} + +fn should_force_braced_expressions(node: &AnyRExpression) -> SyntaxResult { + // A kind of persistent line break, but not one we respect + // with the `persistent_line_breaks` setting, because it is + // more related to whether or not we autobrace + // + // ```r + // function() + // a + // ``` + if node.syntax().has_leading_newline() { + return Ok(true); + } + + Ok(false) +} diff --git a/crates/air_r_formatter/src/r/auxiliary/if_statement.rs b/crates/air_r_formatter/src/r/auxiliary/if_statement.rs index bfe659e7..1be8fc33 100644 --- a/crates/air_r_formatter/src/r/auxiliary/if_statement.rs +++ b/crates/air_r_formatter/src/r/auxiliary/if_statement.rs @@ -1,12 +1,35 @@ use crate::prelude::*; -use crate::statement_body::FormatStatementBody; +use crate::r::auxiliary::else_clause::FormatRElseClauseOptions; +use air_r_syntax::AnyRExpression; +use air_r_syntax::RExpressionList; use air_r_syntax::RIfStatement; use air_r_syntax::RIfStatementFields; +use air_r_syntax::RRoot; +use air_r_syntax::RSyntaxNode; use biome_formatter::format_args; use biome_formatter::write; +use biome_formatter::FormatRuleWithOptions; +use biome_rowan::SyntaxResult; #[derive(Debug, Clone, Default)] -pub(crate) struct FormatRIfStatement; +pub(crate) struct FormatRIfStatement { + pub(crate) braced_expressions: Option, +} + +#[derive(Debug)] +pub(crate) struct FormatRIfStatementOptions { + pub(crate) braced_expressions: Option, +} + +impl FormatRuleWithOptions for FormatRIfStatement { + type Options = FormatRIfStatementOptions; + + fn with_options(mut self, options: Self::Options) -> Self { + self.braced_expressions = options.braced_expressions; + self + } +} + impl FormatNodeRule for FormatRIfStatement { fn fmt_fields(&self, node: &RIfStatement, f: &mut RFormatter) -> FormatResult<()> { let RIfStatementFields { @@ -18,6 +41,33 @@ impl FormatNodeRule for FormatRIfStatement { else_clause, } = node.as_fields(); + // Compute `braced_expressions` if we are on a top level if statement, + // otherwise use the passed through `braced_expressions`, like if we are on the + // 2nd `if` statement of `if (condition) 1 else if (condition) 2`. + let braced_expressions = match self.braced_expressions { + Some(braced_expressions) => braced_expressions, + None => compute_braced_expressions(node)?, + }; + + // It's important that the `else_clause` end up in the same `group()` as the rest + // of the if statement, so we `format_once()` it to be evaluated once we are + // inside the `group()` + let else_clause = format_once(|f| { + if let Some(else_clause) = else_clause { + write!( + f, + [ + space(), + else_clause + .format() + .with_options(FormatRElseClauseOptions { braced_expressions }) + ] + )?; + } + + Ok(()) + }); + write!( f, [group(&format_args![ @@ -26,27 +76,341 @@ impl FormatNodeRule for FormatRIfStatement { l_paren_token.format(), group(&soft_block_indent(&condition.format())), r_paren_token.format(), - FormatStatementBody::new(&consequence?), + space(), + FormatIfBody::new_consequence(&consequence?, braced_expressions), + else_clause ])] - )?; - - // TODO: See more complex else handling (especially with comments) - // in `if_statement.rs` for JS - // Be careful about top level if statements. `else` has to be on the - // same line as the end of `consequence` to parse correctly! - if let Some(else_clause) = else_clause { - let else_on_same_line = true; - // let else_on_same_line = matches!(consequent, RBlockStatement(_)); - - if else_on_same_line { - write!(f, [space()])?; - } else { - write!(f, [hard_line_break()])?; + ) + } +} + +/// Determine if braced expressions should be forced within this if statement +/// +/// We must decide if the if statement is simple enough to potentially stay on a single +/// line: +/// - It must be in a [SyntaxPosition::Value] position +/// - Any existing newline forces multiline +/// - A braced `consequence` or `alternative` forces multiline +/// - Nested if statements force multiline +/// +/// That ends up resulting in the following scenarios: +/// +/// ## The if statement is not in a [SyntaxPosition::Value] position +/// +/// ```r +/// # Before (top level, considered an `Effect`) +/// if (cond) consequence else alternative +/// +/// # After +/// if (cond) { +/// consequence +/// } else { +/// alternative +/// } +/// ``` +/// +/// ## The `consequence` or `alternative` has a leading newline +/// +/// ```r +/// # Before +/// x <- if (cond) +/// consequence +/// +/// x <- if (cond) consequence else +/// alternative +/// +/// # After +/// x <- if (cond) { +/// consequence +/// } +/// +/// x <- if (cond) { +/// consequence +/// } else { +/// alternative +/// } +/// ``` +/// +/// ## The `else` token has a leading newline +/// +/// ```r +/// # Before +/// { +/// if (condition) 1 +/// else 2 +/// } +/// +/// # After +/// { +/// if (condition) { +/// 1 +/// } else { +/// 2 +/// } +/// } +/// ``` +/// +/// ## The `consequence` or `alternative` has braced expressions +/// +/// ```r +/// # Before +/// x <- if (cond) { consequence } else alternative +/// x <- if (cond) consequence else { alternative } +/// +/// # After +/// x <- if (cond) { +/// consequence +/// } else { +/// alternative +/// } +/// ``` +/// +/// ## The `consequence` or `alternative` is another if statement +/// +/// ```r +/// # Before +/// x <- if (cond) if (cond) consequence +/// # |----consequence----| +/// +/// # After +/// # Note that we don't `Force` the inner if to be braced, because short +/// # ifs would be allowed there if the user wants to write it like that to begin with. +/// x <- if (cond) { +/// if (cond) consequence +/// } +/// +/// # Before +/// x <- if (cond) consequence1 else if (cond) consequence2 +/// # |-----alternative----| +/// +/// # After +/// x <- if (cond) { +/// consequence1 +/// } else if (cond) { +/// consequence2 +/// } +/// ``` +fn compute_braced_expressions(node: &RIfStatement) -> SyntaxResult { + if node.syntax().position() != SyntaxPosition::Value { + return Ok(BracedExpressions::Force); + } + + let consequence = node.consequence()?; + + if consequence.syntax().has_leading_newline() { + return Ok(BracedExpressions::Force); + } + if matches!(consequence, AnyRExpression::RBracedExpressions(_)) { + return Ok(BracedExpressions::Force); + } + if matches!(consequence, AnyRExpression::RIfStatement(_)) { + // Disallow `if (condition) if (condition) 1` as that is too complex. + // Also shortcircuits recursion nicely. + // Notably we don't pass through `Force` to the inner if statement as well, + // it gets to compute its own `BracedExpressions` value. + return Ok(BracedExpressions::Force); + } + + if let Some(else_clause) = node.else_clause() { + let else_token = else_clause.else_token()?; + + if else_token.has_leading_newline() { + return Ok(BracedExpressions::Force); + } + + let alternative = else_clause.alternative()?; + + if alternative.syntax().has_leading_newline() { + return Ok(BracedExpressions::Force); + } + if matches!(alternative, AnyRExpression::RBracedExpressions(_)) { + return Ok(BracedExpressions::Force); + } + if matches!(alternative, AnyRExpression::RIfStatement(_)) { + // Disallow `if (condition) 1 else if (condition) 2 else 3` as that is too complex. + // Also shortcircuits recursion nicely. + return Ok(BracedExpressions::Force); + } + }; + + Ok(BracedExpressions::IfGroupBreaks) +} + +pub(crate) struct FormatIfBody<'a> { + node: &'a AnyRExpression, + braced_expressions: BracedExpressions, + if_body_kind: IfBodyKind, +} + +impl<'a> FormatIfBody<'a> { + pub(crate) fn new_consequence( + node: &'a AnyRExpression, + braced_expressions: BracedExpressions, + ) -> Self { + Self { + node, + braced_expressions, + if_body_kind: IfBodyKind::Consequence, + } + } + + pub(crate) fn new_alternative( + node: &'a AnyRExpression, + braced_expressions: BracedExpressions, + ) -> Self { + Self { + node, + braced_expressions, + if_body_kind: IfBodyKind::Alternative, + } + } +} + +#[derive(Debug, Copy, Clone, Default)] +pub(crate) enum BracedExpressions { + Force, + #[default] + IfGroupBreaks, +} + +pub(crate) enum IfBodyKind { + /// The body attached to `if (condition)` + Consequence, + /// The body attached to `else` + Alternative, +} + +impl Format for FormatIfBody<'_> { + fn fmt(&self, f: &mut Formatter) -> FormatResult<()> { + match self.node { + // Body already has braces, just format it + AnyRExpression::RBracedExpressions(node) => { + write!(f, [node.format()]) + } + // Body is an `alternative` that is another if statement, pass through + // pre computed `braced_expressions` (will be `BracedExpressions::Force`) + // + // ```r + // if (TRUE) 1 else if (TRUE) 2 + // # |-----------| + // ``` + AnyRExpression::RIfStatement(node) + if matches!(self.if_body_kind, IfBodyKind::Alternative) => + { + debug_assert!(matches!(self.braced_expressions, BracedExpressions::Force)); + write!( + f, + [node.format().with_options(FormatRIfStatementOptions { + braced_expressions: Some(self.braced_expressions) + })] + ) } + // Body does not have braces yet + node => match self.braced_expressions { + BracedExpressions::Force => write!( + f, + [ + text("{"), + block_indent(&format_args![&node.format()]), + text("}") + ] + ), + BracedExpressions::IfGroupBreaks => write!( + f, + [ + if_group_breaks(&text("{")), + soft_block_indent(&format_args![&node.format()]), + if_group_breaks(&text("}")) + ] + ), + }, + } + } +} - write!(f, [else_clause.format()])?; +// NOTE: If this ends up being useful for more than if statements, we can move it to +// `air_r_syntax` as `syntax_node_ext.rs` +trait SyntaxPositionExt { + /// Compute the [SyntaxPosition] of a node + /// + /// [SyntaxPosition] is an attempt to take a very simple syntax based approach to + /// determine whether a node is in a _value_ or an _effect_ position. + /// + /// - [SyntaxPosition::Value]s are assigned, are used as inputs to functions, and + /// are returned from `RBracedExpression` scopes. + /// + /// - [SyntaxPosition::Effect]s are statements that are typically used for their side + /// effects, such as calling `cat()`, or performing an assignment like `x <- 5`. + /// + /// An [SyntaxPosition::Effect] is either: + /// + /// - A direct child of `RRoot` + /// - A direct child of `RBracedExpression`, with the exception of the last child, + /// which is considered a [SyntaxPosition::Value], as it is the returned value + /// of the braced expression + /// + /// Everything else is considered a [SyntaxPosition::Value]. + /// + /// ## Examples + /// + /// At top level + /// + /// ```r + /// 1 + 1 # Effect + /// ``` + /// + /// Direct children of `{}` + /// + /// ```r + /// fn <- function() { + /// fn() # Effect + /// cat("hi") # Effect + /// fn() # Value, last child of `{}` + /// } + /// ``` + /// + /// Additional examples of [SyntaxPosition::Value] positions + /// + /// ```r + /// x <- + /// fun() + /// function(x = ) + /// map(xs, function() ) + /// ``` + fn position(&self) -> SyntaxPosition; +} + +#[derive(Debug, Clone, Copy, PartialEq)] +enum SyntaxPosition { + Value, + Effect, +} + +impl SyntaxPositionExt for RSyntaxNode { + fn position(&self) -> SyntaxPosition { + let Some(expressions) = self.parent().and_then(RExpressionList::cast) else { + // If our direct parent isn't an `RExpressionList`, we are definitely a `Value` + return SyntaxPosition::Value; + }; + + // Direct child of `RRoot`, this is an `Effect` + if expressions.parent::().is_some() { + return SyntaxPosition::Effect; } - Ok(()) + // Otherwise, direct child of `RBracedExpressions`. If this is the last + // expression, it is a `Value` (the value returned from a scope), otherwise it is + // an `Effect`. + let Some(last) = expressions.last() else { + // Would be unexpected since we just checked that `self` is a child of this, + // but we'd rather not crash if we are wrong + return SyntaxPosition::Value; + }; + + if self == last.syntax() { + SyntaxPosition::Value + } else { + SyntaxPosition::Effect + } } } diff --git a/crates/air_r_formatter/src/r/auxiliary/parameters.rs b/crates/air_r_formatter/src/r/auxiliary/parameters.rs index fb2d6cf3..c9065ee2 100644 --- a/crates/air_r_formatter/src/r/auxiliary/parameters.rs +++ b/crates/air_r_formatter/src/r/auxiliary/parameters.rs @@ -16,6 +16,27 @@ impl FormatNodeRule for FormatRParameters { r_paren_token, } = node.as_fields(); + // Special case where the dangling comment has no node to attach to: + // + // ```r + // function( + // # dangling comment + // ) {} + // ``` + // + // If we don't handle it specially, it can break idempotence. + // Same as `RCallLikeArguments`. + if items.is_empty() { + return write!( + f, + [ + l_paren_token.format(), + format_dangling_comments(node.syntax()).with_soft_block_indent(), + r_paren_token.format() + ] + ); + } + write!( f, [group(&format_args![ @@ -26,6 +47,12 @@ impl FormatNodeRule for FormatRParameters { .should_expand(has_persistent_line_break(&items, f.options()))] ) } + + fn fmt_dangling_comments(&self, _: &RParameters, _: &mut RFormatter) -> FormatResult<()> { + // Formatted inside of `fmt_fields` + // Only applicable for the empty arguments case + Ok(()) + } } /// Check if the user has inserted a persistent line break before the very first `parameter`. diff --git a/crates/air_r_formatter/src/r/auxiliary/repeat_statement.rs b/crates/air_r_formatter/src/r/auxiliary/repeat_statement.rs index 188a4149..9522380c 100644 --- a/crates/air_r_formatter/src/r/auxiliary/repeat_statement.rs +++ b/crates/air_r_formatter/src/r/auxiliary/repeat_statement.rs @@ -1,5 +1,5 @@ +use crate::loop_body::FormatLoopBody; use crate::prelude::*; -use crate::statement_body::FormatStatementBody; use air_r_syntax::RRepeatStatement; use air_r_syntax::RRepeatStatementFields; use biome_formatter::format_args; @@ -15,7 +15,8 @@ impl FormatNodeRule for FormatRRepeatStatement { f, [group(&format_args!( repeat_token.format(), - FormatStatementBody::new(&body?) + space(), + FormatLoopBody::new(&body?) ))] ) } diff --git a/crates/air_r_formatter/src/r/auxiliary/while_statement.rs b/crates/air_r_formatter/src/r/auxiliary/while_statement.rs index 8b200808..b7060114 100644 --- a/crates/air_r_formatter/src/r/auxiliary/while_statement.rs +++ b/crates/air_r_formatter/src/r/auxiliary/while_statement.rs @@ -1,5 +1,5 @@ +use crate::loop_body::FormatLoopBody; use crate::prelude::*; -use crate::statement_body::FormatStatementBody; use air_r_syntax::RWhileStatement; use air_r_syntax::RWhileStatementFields; use biome_formatter::format_args; @@ -25,7 +25,8 @@ impl FormatNodeRule for FormatRWhileStatement { l_paren_token.format(), group(&soft_block_indent(&condition.format())), r_paren_token.format(), - FormatStatementBody::new(&body?) + space(), + FormatLoopBody::new(&body?) ))] ) } diff --git a/crates/air_r_formatter/src/statement_body.rs b/crates/air_r_formatter/src/statement_body.rs deleted file mode 100644 index d5ce6e46..00000000 --- a/crates/air_r_formatter/src/statement_body.rs +++ /dev/null @@ -1,48 +0,0 @@ -use crate::prelude::*; -use air_r_syntax::AnyRExpression; -use biome_formatter::write; - -pub(crate) struct FormatStatementBody<'a> { - body: &'a AnyRExpression, - force_space: bool, -} - -impl<'a> FormatStatementBody<'a> { - pub fn new(body: &'a AnyRExpression) -> Self { - Self { - body, - force_space: false, - } - } - - /// For `if () 1 else if () 2` scenarios, ensures the second `if` is started - /// on the same line as the `else` (rather than line broken) and is - /// separated from the `else` by a single space - pub fn with_forced_space(mut self, value: bool) -> Self { - self.force_space = value; - self - } -} - -impl Format for FormatStatementBody<'_> { - fn fmt(&self, f: &mut Formatter) -> FormatResult<()> { - use AnyRExpression::*; - - // We only want a single space between the construct and the statement - // body if the body is a braced expression. The expression will handle - // the line break as required. Otherwise, we handle the soft line - // indent for all other syntax. - // - // if (a) {} - // |--| statement body - // - // if (a) - // a - // |--| statement body - if matches!(&self.body, RBracedExpressions(_)) || self.force_space { - write!(f, [space(), self.body.format()]) - } else { - write!(f, [soft_line_indent_or_space(&self.body.format())]) - } - } -} diff --git a/crates/air_r_formatter/tests/specs/r/call.R b/crates/air_r_formatter/tests/specs/r/call.R index 8b215a43..1bc70d56 100644 --- a/crates/air_r_formatter/tests/specs/r/call.R +++ b/crates/air_r_formatter/tests/specs/r/call.R @@ -340,23 +340,78 @@ map(xs, function(x) { # Braces expand over multiple lines map(xs, function(x) { }) -# This should stay where it is +# Best fitting is used to choose the most flat variant, and this stays +# as is map(xs, function(x) x) -# This form is too wide, so it fully expands -map(my_long_list_my_long_list_my_long_list_my_long_list, function(my_long_argument) { +# Best fitting is used to choose the most expanded variant over +# the current middle variant, because the middle variant exceeds +# the line length +map(my_long_list_my_long_list_my_long_list_my_long_list_my_long_list_my_long_list, function(my_long_argument) { my_long_body_my_long_body_my_long_body_my_long_body_my_long_body }) -# Parameter names are very long, so it fully expands -# (Note that this uses best-fitting. The `parameters` themselves don't force a -# break, but when a best-fit choice is made between this form with no -# soft-indents allowed in the `parameters` and the fully expanded form, the -# fully expanded form wins) +# Best fitting is used to choose the most expanded variant over +# the current middle variant. Remember no soft-indents are allowed in the +# `parameters` when looking at options for the middle variant, so the middle +# variant can't choose to put each parameter on its own line in the current +# form. Only the most expanded form can do that when fully breaking everything. map(x, function(a, a_really_really_long_parameter, and_another_one_here_too_wow_this_is_long) { 1 }) +# Best fitting is used to choose most expanded here. Even with the middle +# variant, the first map() argument and the function signature would exceed the +# line length. Autobracing DOES NOT kick in as it expands in this case. +map(my_long_list_my_long_list_my_long_list_my_long_list, function(my_long_argument) my_long_argument) + +# Best fitting is used to choose most expanded here. Even with the middle +# variant, the first map() argument and the function signature would exceed the +# line length. Autobracing DOES kick in as it expands in this case as it fully +# expands the function definition too to keep it within the line length. +map(my_long_list_my_long_list_my_long_list_my_long_list, function(my_long_argument) my_long_list_my_long_list_my_long_list_my_long_list_my_long_list) + +# Best fitting is used to choose the middle variant over the current most flat +# form. The middle variant fits within the line length, so is preferred over +# the most expanded form. +map(xs, function(my_long_argument) my_long_argument + my_extra_long_extra_argument) + +# Best fitting is used to choose the middle variant. The line break forces +# autobracing, which means the function definition breaks. That rules out +# the most flat variant, and then the middle variant is preferred over most +# expanded because it fits in the line length. +map(xs, function(x, option = "a") + x +) + +# Best fitting is attempted, but we bail early because the persistent line +# break causes the function definition `parameters` to expand, and if that +# happens we fully expand +map(xs, function( + x, option = "a") { + x +}) + +# Best fitting is not used here. The `xs` already has a line break, so +# we stay fully expanded. The function parameters are flattened though. +map( + xs, + function(x, + option = "a" + ) { + x + } +) + +# Best fitting is used to choose the middle variant. This is not a persistent +# line break location, so the function `parameters` don't actually break here. +# And then we choose middle over most expanded because it fits within the line +# length. +map(xs, function(x, + option = "a") { + x +}) + # The `{ 1 }` parameter would force a hard line break. We detect this and don't # use best-fitting. Instead we fall back to the most expanded form. map(x, function(a = { 1 }) { diff --git a/crates/air_r_formatter/tests/specs/r/call.R.snap b/crates/air_r_formatter/tests/specs/r/call.R.snap index af32020a..6238ff37 100644 --- a/crates/air_r_formatter/tests/specs/r/call.R.snap +++ b/crates/air_r_formatter/tests/specs/r/call.R.snap @@ -347,23 +347,78 @@ map(xs, function(x) { # Braces expand over multiple lines map(xs, function(x) { }) -# This should stay where it is +# Best fitting is used to choose the most flat variant, and this stays +# as is map(xs, function(x) x) -# This form is too wide, so it fully expands -map(my_long_list_my_long_list_my_long_list_my_long_list, function(my_long_argument) { +# Best fitting is used to choose the most expanded variant over +# the current middle variant, because the middle variant exceeds +# the line length +map(my_long_list_my_long_list_my_long_list_my_long_list_my_long_list_my_long_list, function(my_long_argument) { my_long_body_my_long_body_my_long_body_my_long_body_my_long_body }) -# Parameter names are very long, so it fully expands -# (Note that this uses best-fitting. The `parameters` themselves don't force a -# break, but when a best-fit choice is made between this form with no -# soft-indents allowed in the `parameters` and the fully expanded form, the -# fully expanded form wins) +# Best fitting is used to choose the most expanded variant over +# the current middle variant. Remember no soft-indents are allowed in the +# `parameters` when looking at options for the middle variant, so the middle +# variant can't choose to put each parameter on its own line in the current +# form. Only the most expanded form can do that when fully breaking everything. map(x, function(a, a_really_really_long_parameter, and_another_one_here_too_wow_this_is_long) { 1 }) +# Best fitting is used to choose most expanded here. Even with the middle +# variant, the first map() argument and the function signature would exceed the +# line length. Autobracing DOES NOT kick in as it expands in this case. +map(my_long_list_my_long_list_my_long_list_my_long_list, function(my_long_argument) my_long_argument) + +# Best fitting is used to choose most expanded here. Even with the middle +# variant, the first map() argument and the function signature would exceed the +# line length. Autobracing DOES kick in as it expands in this case as it fully +# expands the function definition too to keep it within the line length. +map(my_long_list_my_long_list_my_long_list_my_long_list, function(my_long_argument) my_long_list_my_long_list_my_long_list_my_long_list_my_long_list) + +# Best fitting is used to choose the middle variant over the current most flat +# form. The middle variant fits within the line length, so is preferred over +# the most expanded form. +map(xs, function(my_long_argument) my_long_argument + my_extra_long_extra_argument) + +# Best fitting is used to choose the middle variant. The line break forces +# autobracing, which means the function definition breaks. That rules out +# the most flat variant, and then the middle variant is preferred over most +# expanded because it fits in the line length. +map(xs, function(x, option = "a") + x +) + +# Best fitting is attempted, but we bail early because the persistent line +# break causes the function definition `parameters` to expand, and if that +# happens we fully expand +map(xs, function( + x, option = "a") { + x +}) + +# Best fitting is not used here. The `xs` already has a line break, so +# we stay fully expanded. The function parameters are flattened though. +map( + xs, + function(x, + option = "a" + ) { + x + } +) + +# Best fitting is used to choose the middle variant. This is not a persistent +# line break location, so the function `parameters` don't actually break here. +# And then we choose middle over most expanded because it fits within the line +# length. +map(xs, function(x, + option = "a") { + x +}) + # The `{ 1 }` parameter would force a hard line break. We detect this and don't # use best-fitting. Instead we fall back to the most expanded form. map(x, function(a = { 1 }) { @@ -1187,22 +1242,25 @@ map(xs, function(x) { map(xs, function(x) { }) -# This should stay where it is +# Best fitting is used to choose the most flat variant, and this stays +# as is map(xs, function(x) x) -# This form is too wide, so it fully expands +# Best fitting is used to choose the most expanded variant over +# the current middle variant, because the middle variant exceeds +# the line length map( - my_long_list_my_long_list_my_long_list_my_long_list, + my_long_list_my_long_list_my_long_list_my_long_list_my_long_list_my_long_list, function(my_long_argument) { my_long_body_my_long_body_my_long_body_my_long_body_my_long_body } ) -# Parameter names are very long, so it fully expands -# (Note that this uses best-fitting. The `parameters` themselves don't force a -# break, but when a best-fit choice is made between this form with no -# soft-indents allowed in the `parameters` and the fully expanded form, the -# fully expanded form wins) +# Best fitting is used to choose the most expanded variant over +# the current middle variant. Remember no soft-indents are allowed in the +# `parameters` when looking at options for the middle variant, so the middle +# variant can't choose to put each parameter on its own line in the current +# form. Only the most expanded form can do that when fully breaking everything. map( x, function( @@ -1214,6 +1272,70 @@ map( } ) +# Best fitting is used to choose most expanded here. Even with the middle +# variant, the first map() argument and the function signature would exceed the +# line length. Autobracing DOES NOT kick in as it expands in this case. +map( + my_long_list_my_long_list_my_long_list_my_long_list, + function(my_long_argument) my_long_argument +) + +# Best fitting is used to choose most expanded here. Even with the middle +# variant, the first map() argument and the function signature would exceed the +# line length. Autobracing DOES kick in as it expands in this case as it fully +# expands the function definition too to keep it within the line length. +map( + my_long_list_my_long_list_my_long_list_my_long_list, + function(my_long_argument) { + my_long_list_my_long_list_my_long_list_my_long_list_my_long_list + } +) + +# Best fitting is used to choose the middle variant over the current most flat +# form. The middle variant fits within the line length, so is preferred over +# the most expanded form. +map(xs, function(my_long_argument) { + my_long_argument + my_extra_long_extra_argument +}) + +# Best fitting is used to choose the middle variant. The line break forces +# autobracing, which means the function definition breaks. That rules out +# the most flat variant, and then the middle variant is preferred over most +# expanded because it fits in the line length. +map(xs, function(x, option = "a") { + x +}) + +# Best fitting is attempted, but we bail early because the persistent line +# break causes the function definition `parameters` to expand, and if that +# happens we fully expand +map( + xs, + function( + x, + option = "a" + ) { + x + } +) + +# Best fitting is not used here. The `xs` already has a line break, so +# we stay fully expanded. The function parameters are flattened though. +map( + xs, + function(x, option = "a") { + x + } +) + +# Best fitting is used to choose the middle variant. This is not a persistent +# line break location, so the function `parameters` don't actually break here. +# And then we choose middle over most expanded because it fits within the line +# length. +map(xs, function(x, option = "a") { + x +}) + # The `{ 1 }` parameter would force a hard line break. We detect this and don't # use best-fitting. Instead we fall back to the most expanded form. map( @@ -1689,8 +1811,8 @@ foo(bar[ # Lines exceeding max width of 80 characters ``` 307: my_long_list_my_long_list_my_long_list_my_long_list_long_long_long_long_long_list, - 701: foobarbafoobarbafoobarbafoobarbafoobarbafoobarbafoobarbafoobarbafoobarbazzzzzzzzzfoobarbaz - 705: c(list(foobarbafoobarbafoobarbafoobarbafoobarbafoobarbafoobarbafoobarbafoobarbazzzzzzzzzfoobarbaz())) - 708: c(list(foobarbafoobarbafoobarbafoobarbafoobarbafoobarbafoobarbafoobarbafoobarbazzzzzzzzzfoobarbaz( - 721: name = foobarbafoobarbafoobarbafoobarbafoobarbafoobarbafoobarbafoobarbafoobarbazzzzzzzzzfoobarbaz( + 768: foobarbafoobarbafoobarbafoobarbafoobarbafoobarbafoobarbafoobarbafoobarbazzzzzzzzzfoobarbaz + 772: c(list(foobarbafoobarbafoobarbafoobarbafoobarbafoobarbafoobarbafoobarbafoobarbazzzzzzzzzfoobarbaz())) + 775: c(list(foobarbafoobarbafoobarbafoobarbafoobarbafoobarbafoobarbafoobarbafoobarbazzzzzzzzzfoobarbaz( + 788: name = foobarbafoobarbafoobarbafoobarbafoobarbafoobarbafoobarbafoobarbafoobarbazzzzzzzzzfoobarbaz( ``` diff --git a/crates/air_r_formatter/tests/specs/r/directives/skip.R b/crates/air_r_formatter/tests/specs/r/directives/skip.R index 494075a8..3e362302 100644 --- a/crates/air_r_formatter/tests/specs/r/directives/skip.R +++ b/crates/air_r_formatter/tests/specs/r/directives/skip.R @@ -131,3 +131,9 @@ function(){ 1+1 2+2 } + +# ----------------------------------------------------------------------------- +# If statements and comments + +# fmt: skip +if (TRUE) 1 # hi diff --git a/crates/air_r_formatter/tests/specs/r/directives/skip.R.snap b/crates/air_r_formatter/tests/specs/r/directives/skip.R.snap index c91e9ef3..b92d34d6 100644 --- a/crates/air_r_formatter/tests/specs/r/directives/skip.R.snap +++ b/crates/air_r_formatter/tests/specs/r/directives/skip.R.snap @@ -139,6 +139,12 @@ function(){ 2+2 } +# ----------------------------------------------------------------------------- +# If statements and comments + +# fmt: skip +if (TRUE) 1 # hi + ``` @@ -291,4 +297,10 @@ function() { 1+1 2 + 2 } + +# ----------------------------------------------------------------------------- +# If statements and comments + +# fmt: skip +if (TRUE) 1 # hi ``` diff --git a/crates/air_r_formatter/tests/specs/r/for_statement.R b/crates/air_r_formatter/tests/specs/r/for_statement.R index bdd2356c..0eb90bdb 100644 --- a/crates/air_r_formatter/tests/specs/r/for_statement.R +++ b/crates/air_r_formatter/tests/specs/r/for_statement.R @@ -1,11 +1,75 @@ -for(x in y) x + y +for(x in xs) { + x + 1 +} -for(a_really_long_argument_name in but_we_dont_ever_break_inside_for_conditions_no_matter_how_long) 1 +for(a_really_long_argument_name in but_we_dont_ever_break_inside_for_conditions_no_matter_how_long) { + a_really_long_argument_name +} -# own-line comments get lifted up +# ------------------------------------------------------------------------------ +# Autobracing + +for(x in xs) {} + +# Unconditional autobracing on for loop bodies to maximize clarity and intent +for(x in xs) x +for(x in xs) x + y + +# ------------------------------------------------------------------------------ +# Comments + +for # leads for loop +(i in 1:5) { +} + +for ( # leads for loop +i in 1:5) { +} + +for (i # leads for loop +in 1:5) { +} + +for (i in # leads for loop +1:5) { +} + +for (i in +# leads for loop +1:5) { +} + +for (i in 1:5 # leads for loop +) { +} + +for (i in 1:5) # dangles {} + { + } + +for (i in 1:5) # leads a +{ + a +} + +for (i in 1:5) { # leads a + a +} + +for (i in 1:5) i # trails whole for loop + +for (i in 1:5) { i } # trails whole for loop + +# Comments 1-3 lead the whole for loop +# Comments 4-5 move to lead the body for ( # comment1 # comment2 a in 1 - ) # comment3 - a + # comment3 + ) # comment4 + # comment5 + { + # comment6 + a + } \ No newline at end of file diff --git a/crates/air_r_formatter/tests/specs/r/for_statement.R.snap b/crates/air_r_formatter/tests/specs/r/for_statement.R.snap index 570e2f39..fc3a86bb 100644 --- a/crates/air_r_formatter/tests/specs/r/for_statement.R.snap +++ b/crates/air_r_formatter/tests/specs/r/for_statement.R.snap @@ -5,18 +5,81 @@ info: r/for_statement.R # Input ```R -for(x in y) x + y +for(x in xs) { + x + 1 +} -for(a_really_long_argument_name in but_we_dont_ever_break_inside_for_conditions_no_matter_how_long) 1 +for(a_really_long_argument_name in but_we_dont_ever_break_inside_for_conditions_no_matter_how_long) { + a_really_long_argument_name +} -# own-line comments get lifted up +# ------------------------------------------------------------------------------ +# Autobracing + +for(x in xs) {} + +# Unconditional autobracing on for loop bodies to maximize clarity and intent +for(x in xs) x +for(x in xs) x + y + +# ------------------------------------------------------------------------------ +# Comments + +for # leads for loop +(i in 1:5) { +} + +for ( # leads for loop +i in 1:5) { +} + +for (i # leads for loop +in 1:5) { +} + +for (i in # leads for loop +1:5) { +} + +for (i in +# leads for loop +1:5) { +} + +for (i in 1:5 # leads for loop +) { +} + +for (i in 1:5) # dangles {} + { + } + +for (i in 1:5) # leads a +{ + a +} + +for (i in 1:5) { # leads a + a +} + +for (i in 1:5) i # trails whole for loop + +for (i in 1:5) { i } # trails whole for loop + +# Comments 1-3 lead the whole for loop +# Comments 4-5 move to lead the body for ( # comment1 # comment2 a in 1 - ) # comment3 - a - + # comment3 + ) # comment4 + # comment5 + { + # comment6 + a + } ``` @@ -36,19 +99,91 @@ Skip: None ----- ```R -for (x in y) x + y +for (x in xs) { + x + 1 +} + +for (a_really_long_argument_name in but_we_dont_ever_break_inside_for_conditions_no_matter_how_long) { + a_really_long_argument_name +} + +# ------------------------------------------------------------------------------ +# Autobracing + +for (x in xs) { +} + +# Unconditional autobracing on for loop bodies to maximize clarity and intent +for (x in xs) { + x +} +for (x in xs) { + x + y +} + +# ------------------------------------------------------------------------------ +# Comments + +# leads for loop +for (i in 1:5) { +} + +# leads for loop +for (i in 1:5) { +} + +# leads for loop +for (i in 1:5) { +} + +# leads for loop +for (i in 1:5) { +} + +# leads for loop +for (i in 1:5) { +} + +# leads for loop +for (i in 1:5) { +} + +for (i in 1:5) { + # dangles {} +} + +for (i in 1:5) { + # leads a + a +} + +for (i in 1:5) { + # leads a + a +} + +for (i in 1:5) { + i +} # trails whole for loop -for (a_really_long_argument_name in but_we_dont_ever_break_inside_for_conditions_no_matter_how_long) - 1 +for (i in 1:5) { + i +} # trails whole for loop -# own-line comments get lifted up +# Comments 1-3 lead the whole for loop +# Comments 4-5 move to lead the body # comment1 # comment2 -for (a in 1) # comment3 +# comment3 +for (a in 1) { + # comment4 + # comment5 + # comment6 a +} ``` # Lines exceeding max width of 80 characters ``` - 3: for (a_really_long_argument_name in but_we_dont_ever_break_inside_for_conditions_no_matter_how_long) + 5: for (a_really_long_argument_name in but_we_dont_ever_break_inside_for_conditions_no_matter_how_long) { ``` diff --git a/crates/air_r_formatter/tests/specs/r/function_definition.R b/crates/air_r_formatter/tests/specs/r/function_definition.R index 20063b80..7a796f1c 100644 --- a/crates/air_r_formatter/tests/specs/r/function_definition.R +++ b/crates/air_r_formatter/tests/specs/r/function_definition.R @@ -1,44 +1,129 @@ +# ------------------------------------------------------------------------ +# Miscellaneous + +function(a_really_long_argument_name_to_break_on, and_here_is_another_one_please_break_me, and_this) { + a +} + +function(a = { + 1 +}, b) { + 1 +} + +# ------------------------------------------------------------------------ +# Autobracing + +# These stay flat function() 1 function(a, b) 1 +\() 1 +\(a, b) 1 -function(a_really_long_argument_name_to_break_on, and_here_is_another_one_please_break_me, and_this) 1 +# Persistent line break in `parameters` triggers autobracing because the `body` +# is in the same `group()` +function( + a, b) 1 -function(a_really_long_argument_name_to_break_on, and_this) a_really_long_argument_name_to_break_on +\( + a, b) 1 -function(a = { +# Persistent line break in `parameters` triggers autobracing because the `body` +# is in the same `group()` +function( + a, b +) 1 + +# Persistent line break in `body` triggers autobracing +function(a, b) 1 -}, b) { + +\(a, b) 1 + +# This snaps back to one line +function +(a, b) 1 + +function(a_really_long_argument_name_to_break_on, and_here_is_another_one_please_break_me, and_this) a + +function(a_really_long_argument_name_to_break_on, and_this) a_really_long_argument_name_to_break_on + +# ------------------------------------------------------------------------ +# Comments + +function # leads function +() { +} + +function +# leads function +() { +} + +function( # dangles () +) { +} + +function( + # dangles () +) { } function() { - # comment + # dangles {} } -function() # becomes leading on `1 + 1` +function() a # trails function + +function() # leads `a` { - 1 + 1 + a } -function() # becomes leading on `1 + 1` +function() # leads `a` { # an inner comment - 1 + 1 + a } -function() # becomes dangling on the `{}` +function() # dangles {} { } -function() # becomes dangling on the `{}` +function() # dangles {} { # an inner comment but empty `{}` } -function() # becomes leading on `1 + 1` - 1 + 1 +function() # leads `a` + a + +# Not much we can do here, it's not enclosed by the `function_definition` node +# so it ends up trailing the `}` of the function. This is consistent with +# non-enclosed comments in if/else and loops. +function() + a # trails function -\(x, y) 1 +function( + # leads `a` + a +) { + # comment +} + +function( + a # trails `a` +) { + # comment +} + +function( + a + # trails `a` +) { + # comment +} # ------------------------------------------------------------------------ # User requested line break @@ -94,31 +179,3 @@ fn <- function(a, b = c( 1, 2, 3)) { body } - -# ------------------------------------------------------------------------ -# User requested line break and trailing anonymous functions in calls - -# Ensure these features play nicely together - -# This user line break expands the function definition, causing the whole -# `map()` to expand -map(xs, function( - x, option = "a") { - x -}) - -# This flattens the function definition, but the `map()` stays expanded -map( - xs, - function(x, - option = "a" - ) { - x - } -) - -# This flattens to one line -map(xs, function(x, - option = "a") { - x -}) diff --git a/crates/air_r_formatter/tests/specs/r/function_definition.R.snap b/crates/air_r_formatter/tests/specs/r/function_definition.R.snap index 526ab59a..3d9df5c8 100644 --- a/crates/air_r_formatter/tests/specs/r/function_definition.R.snap +++ b/crates/air_r_formatter/tests/specs/r/function_definition.R.snap @@ -5,47 +5,132 @@ info: r/function_definition.R # Input ```R +# ------------------------------------------------------------------------ +# Miscellaneous + +function(a_really_long_argument_name_to_break_on, and_here_is_another_one_please_break_me, and_this) { + a +} + +function(a = { + 1 +}, b) { + 1 +} + +# ------------------------------------------------------------------------ +# Autobracing + +# These stay flat function() 1 function(a, b) 1 +\() 1 +\(a, b) 1 -function(a_really_long_argument_name_to_break_on, and_here_is_another_one_please_break_me, and_this) 1 +# Persistent line break in `parameters` triggers autobracing because the `body` +# is in the same `group()` +function( + a, b) 1 -function(a_really_long_argument_name_to_break_on, and_this) a_really_long_argument_name_to_break_on +\( + a, b) 1 -function(a = { +# Persistent line break in `parameters` triggers autobracing because the `body` +# is in the same `group()` +function( + a, b +) 1 + +# Persistent line break in `body` triggers autobracing +function(a, b) 1 -}, b) { + +\(a, b) 1 + +# This snaps back to one line +function +(a, b) 1 + +function(a_really_long_argument_name_to_break_on, and_here_is_another_one_please_break_me, and_this) a + +function(a_really_long_argument_name_to_break_on, and_this) a_really_long_argument_name_to_break_on + +# ------------------------------------------------------------------------ +# Comments + +function # leads function +() { +} + +function +# leads function +() { +} + +function( # dangles () +) { +} + +function( + # dangles () +) { } function() { - # comment + # dangles {} } -function() # becomes leading on `1 + 1` +function() a # trails function + +function() # leads `a` { - 1 + 1 + a } -function() # becomes leading on `1 + 1` +function() # leads `a` { # an inner comment - 1 + 1 + a } -function() # becomes dangling on the `{}` +function() # dangles {} { } -function() # becomes dangling on the `{}` +function() # dangles {} { # an inner comment but empty `{}` } -function() # becomes leading on `1 + 1` - 1 + 1 +function() # leads `a` + a + +# Not much we can do here, it's not enclosed by the `function_definition` node +# so it ends up trailing the `}` of the function. This is consistent with +# non-enclosed comments in if/else and loops. +function() + a # trails function -\(x, y) 1 +function( + # leads `a` + a +) { + # comment +} + +function( + a # trails `a` +) { + # comment +} + +function( + a + # trails `a` +) { + # comment +} # ------------------------------------------------------------------------ # User requested line break @@ -102,34 +187,6 @@ fn <- function(a, b = c( body } -# ------------------------------------------------------------------------ -# User requested line break and trailing anonymous functions in calls - -# Ensure these features play nicely together - -# This user line break expands the function definition, causing the whole -# `map()` to expand -map(xs, function( - x, option = "a") { - x -}) - -# This flattens the function definition, but the `map()` stays expanded -map( - xs, - function(x, - option = "a" - ) { - x - } -) - -# This flattens to one line -map(xs, function(x, - option = "a") { - x -}) - ``` @@ -149,17 +206,16 @@ Skip: None ----- ```R -function() 1 -function(a, b) 1 +# ------------------------------------------------------------------------ +# Miscellaneous function( a_really_long_argument_name_to_break_on, and_here_is_another_one_please_break_me, and_this -) 1 - -function(a_really_long_argument_name_to_break_on, and_this) - a_really_long_argument_name_to_break_on +) { + a +} function( a = { @@ -170,35 +226,142 @@ function( 1 } +# ------------------------------------------------------------------------ +# Autobracing + +# These stay flat +function() 1 +function(a, b) 1 +\() 1 +\(a, b) 1 + +# Persistent line break in `parameters` triggers autobracing because the `body` +# is in the same `group()` +function( + a, + b +) { + 1 +} + +\( + a, + b +) { + 1 +} + +# Persistent line break in `parameters` triggers autobracing because the `body` +# is in the same `group()` +function( + a, + b +) { + 1 +} + +# Persistent line break in `body` triggers autobracing +function(a, b) { + 1 +} + +\(a, b) { + 1 +} + +# This snaps back to one line +function(a, b) 1 + +function( + a_really_long_argument_name_to_break_on, + and_here_is_another_one_please_break_me, + and_this +) { + a +} + +function(a_really_long_argument_name_to_break_on, and_this) { + a_really_long_argument_name_to_break_on +} + +# ------------------------------------------------------------------------ +# Comments + +# leads function function() { - # comment } +# leads function function() { - # becomes leading on `1 + 1` - 1 + 1 +} + +function( + # dangles () +) { +} + +function( + # dangles () +) { } function() { - # becomes leading on `1 + 1` + # dangles {} +} + +function() a # trails function + +function() { + # leads `a` + a +} + +function() { + # leads `a` # an inner comment - 1 + 1 + a } function() { - # becomes dangling on the `{}` + # dangles {} } function() { - # becomes dangling on the `{}` + # dangles {} # an inner comment but empty `{}` } -function() - # becomes leading on `1 + 1` - 1 + 1 +function() { + # leads `a` + a +} -\(x, y) 1 +# Not much we can do here, it's not enclosed by the `function_definition` node +# so it ends up trailing the `}` of the function. This is consistent with +# non-enclosed comments in if/else and loops. +function() { + a +} # trails function + +function( + # leads `a` + a +) { + # comment +} + +function( + a # trails `a` +) { + # comment +} + +function( + a + # trails `a` +) { + # comment +} # ------------------------------------------------------------------------ # User requested line break @@ -257,34 +420,4 @@ fn <- function( ) { body } - -# ------------------------------------------------------------------------ -# User requested line break and trailing anonymous functions in calls - -# Ensure these features play nicely together - -# This user line break expands the function definition, causing the whole -# `map()` to expand -map( - xs, - function( - x, - option = "a" - ) { - x - } -) - -# This flattens the function definition, but the `map()` stays expanded -map( - xs, - function(x, option = "a") { - x - } -) - -# This flattens to one line -map(xs, function(x, option = "a") { - x -}) ``` diff --git a/crates/air_r_formatter/tests/specs/r/if_statement.R b/crates/air_r_formatter/tests/specs/r/if_statement.R index 8d39734e..3b449c10 100644 --- a/crates/air_r_formatter/tests/specs/r/if_statement.R +++ b/crates/air_r_formatter/tests/specs/r/if_statement.R @@ -1,18 +1,15 @@ -if (a) 1 -if (a) 1 else 2 -if (a) 1 else if (b) 2 else 3 - -# Spacing test -if(a)1 else if(b)2 else 3 - -# Line break test -if (a_really_really_long_condition_here_that_is_allowed_to_break_onto_the_next_line) 1 else 2 +# --------------------------------------------------------------------------- +# Comments if (a) # becomes leading on `1 + 1` { 1 + 1 } +if (a) { # becomes leading on `1 + 1` + 1 + 1 +} + if (a) # becomes dangling on `{}` { } @@ -25,6 +22,10 @@ if (a) # becomes dangling on `{}` if (a) # becomes leading on `TRUE` TRUE +if +# becomes leading on `a` +(a) TRUE + if ( a # becomes trailing on `a` @@ -37,6 +38,255 @@ if (a # becomes trailing on `a` TRUE } +{ + if (condition) this # becomes leading on `this` + else that +} + +{ + if (condition) this + # becomes leading on `that` + else that +} + +# With `{ this }`, it's nice that this becomes leading on `this` +{ + if (condition) { this } # becomes leading on `this` + else that +} + +# With `{\n this\n}`, it's arguable whether this should lead +# `this` or lead `that`, but we keep things simple here to have +# one code path that also handles the `{ this }` case. +{ + if (condition) { + this + } # becomes leading on `this` + else that +} + +{ + if (condition) { + + } # becomes dangling on `{}` + else that +} + +{ + if (condition) { + this + } + # becomes leading on `that` + else that +} + +{ + if (condition) { + this + } + # becomes leading on `that` + else { + that + } +} + +{ + if (condition) { + this + } + # becomes dangling on `{}` + else { + + } +} + +{ + if (condition) { + this + } + # becomes leading on `that` + else if (condition) { + that + } +} + +{ + if (condition) { + this + } + # becomes leading on `that` + else if (condition) that +} + +{ + if (condition) { + this + } + # becomes dangling on `{}` + else if (condition) {} +} + +{ + if (condition) { + this + } + # becomes leading on `that` + # becomes leading on `that` part 2 + else if (condition) { + that + } +} + +{ + if (condition) a + else # becomes leading on `b` + b +} + +{ + if (condition) a + else # becomes leading on `b` + { + b + } +} + +if (condition) { + a +} else if (condition) { + b +} else # becomes leading on `c` +{ + c +} + +if (condition) # becomes leading on `a` +{ + a +} else if (condition) # becomes leading on `b` +{ + b +} else # becomes leading on `c` +{ + c +} + +# In general, greatly prefer creating leading comments on `a` rather than +# creating trailing comments on `a`, as it is much easier to handle from +# an idempotence point of view. +{ + if (condition) { + if (condition) a + } # becomes leading on `a`'s if statement + else { + b + } +} + +{ + if (condition) a # becomes leading on `a` + else if (condition) b +} + +{ + if (condition) { a } # becomes leading on `a` + else if (condition) b +} + +{ + if (condition) { + a + } # becomes leading on `a` + else if (condition) b +} + +# --------------------------------------------------------------------------- +# Comments - these comments aren't "enclosed" by the if statement, so they +# our outside our if statement comment handling hooks. Avoid the urge to +# try and attach these comments to the last node in the if statement, as +# that causes verbatim printing (with `fmt: skip`) to break, because biome's +# formatter expects that the trailing comment trails the outermost code element +# (here, the whole if statement, not the last node of it). We have `fmt: skip` +# related test elsewhere to ensure we don't break this. + +# These stay where they are, short one liner if statements in value position +# don't expand +x <- if (condition) a # becomes trailing on if statement +x <- if (condition) a else b # becomes trailing on if statement + +# These expand from being at top level in effect position +if (condition) a # becomes trailing on if statement +if (condition) a else b # becomes trailing on if statement + +if (condition) { a } # becomes trailing on if statement +if (condition) { a } else { b } # becomes trailing on if statement + +if (condition) a_really_really_long_name_here_that_forces_a_break_because_it_is_so_long # becomes trailing on if statement +if (condition) { a_really_really_long_name_here_that_forces_a_break_because_it_is_so_long } # becomes trailing on if statement + +if (condition) a_really_really_long_name_here else another_really_really_long_name # becomes trailing on if statement +if (condition) a_really_really_long_name_here else { another_really_really_long_name } # becomes trailing on if statement + +if (condition) a_really_really_long_name_here else if (condition2) another_really_really_long_name # becomes trailing on outer if statement +if (condition) a_really_really_long_name_here else if (condition2) { another_really_really_long_name } # becomes trailing on outer if statement + +if (condition) a_really_really_long_name_here else if (condition2) another_really_really_long_name else that_name # becomes trailing on outer if statement +if (condition) a_really_really_long_name_here else if (condition2) another_really_really_long_name else { that_name } # becomes trailing on outer if statement + +if (condition) + a # becomes trailing on if statement + +if (condition) # becomes leading on `a` + a # becomes trailing on if statement + +if (condition) { + a +} # becomes trailing on if statement + +# It would be nice to have consistent behavior, but it's basically impossible. +# Comment on `a` is enclosed by the if statement so our hooks handle it, but +# comment on `b` is not! +{ + if (condition) a # becomes leading on `a` + else b # becomes trailing on if statement +} + +{ + if (condition) a + else b # becomes trailing on if statement +} + +{ + if (condition) a + else { + b + } # becomes trailing on if statement +} + +{ + if (condition) a + else b + # Floating comment after the if statement +} + +# Nesting in `consequence` +x <- if (condition) if (condition2) this # becomes trailing on if statement +x <- if (condition) if (condition2) if (condition3) this # becomes trailing on if statement +x <- if (condition) if (condition2) this else that # becomes trailing on if statement +if (condition) if (condition2) this # becomes trailing on if statement +if (condition) if (condition2) if (condition3) this # becomes trailing on if statement +if (condition) if (condition2) this else that # becomes trailing on if statement + +# Nesting in `alternative` +x <- if (condition) this else that # stays flat, one liner if statement in value position +if (condition) this else that # becomes trailing on if statement +if (condition) this else if (condition2) that # becomes trailing on if statement +if (condition) this else if (condition2) this2 else that # becomes trailing on if statement +if (condition) this else if (condition2) this2 else if (condition3) that # becomes trailing on if statement + +# --------------------------------------------------------------------------- +# Special + # Breaks, but the `condition` itself fits and is not expanded if (this || this || this || this || this || this || this || this || this || this) { 1 @@ -49,3 +299,116 @@ if (this || this || this || this || this || this || this || this || this || this } else { 2 } + +# --------------------------------------------------------------------------- +# Auto bracing + +# Not allowed on one line without braces (effect position) +if (a) 1 +if (a) 1 else 2 +{ + if (a) 1 + 1 + 1 +} + +# Allowed on one line without braces (value position) +x = if (a) 1 +x <- if (a) 1 +x <<- if (a) 1 +x = if (a) 1 else 2 +x <- if (a) 1 else 2 +x <<- if (a) 1 else 2 +{ + if (a) 1 +} + +# Allowed on one line without braces (value position) +fn(if (a) 1) +fn(if (a) 1, if (a) 1 else 2) +fn(x = if (a) 1) +fn(if (a) 1, x = if (a) 1 else 2) +# Too complex, forces braces +fn(if (a) 1 else if (b) 2 else 3) +# Breaks argument list, but not if/else +fn(if (a) 1, x = if (a) 1 else 2, this_is_really_really_long_and_breaks_the_group_here) + +# Allowed on one line without braces (value position) +function(x = if (a) 1) {} +function(x = if (a) 1, y = if (a) 1 else 2) {} +# Too complex, forces braces +function(x = if (a) 1 else if (b) 2 else 3) {} +# Breaks parameter list, but not if/else +function(x = if (a) 1, y = if (a) 1 else 2, this_is_really_really_long_and_breaks_the_group_here) {} + +# The group breaking forces braces +x <- if (something_really_really_long_here_something_really_really_long_here) 1 else 2 +x <- if (a) something_really_really_long_here else and_something_really_really_long_here + +# The leading newline forces braces +x <- if (a) + 1 +x <- if (a) 1 else + 2 + +# The leading newline before `else` forces braces +{ + if (a) 1 + else 2 +} + +# The nested if in `consequence` forces braces around the outer if +x <- if (a) if (b) 1 +x <- if (a) if (b) if (c) 1 +x <- if (a) if (b) 1 else 2 + +# We don't force the inner if inside the `consequence` to have braces as +# well, because a user could have writen a short if in the inner branch to +# begin with (like the second example here) and that would have been valid. +x <- if (a) if (b) 1 +x <- if (a) { + if (b) 1 +} + +# The nested if in `alternative` forces braces +x <- if (a) 1 else if (b) 2 +x <- if (a) 1 else if (b) 2 else 3 + +# The braces on one piece force braces +x <- if (a) {1} else 2 +x <- if (a) {1} else {2} + +# Seen in dplyr, don't autobrace (value position) +map(xs, function(x) if (is_thing(x)) "this" else "that") + +# Seen in dplyr, don't autobrace (value position) +map(xs, function(x) { + if (is_thing(x)) "this" else "that" +}) + +# Seen in dplyr, don't autobrace (value position) +x <- x %||% if (condition) "this" else "that" + +# Seen in dplyr, don't autobrace (value position) +ntext <- function(n, msg1, msg2) { + if (n == 1) msg1 else msg2 +} + +# We want side-effects like `return()` inside if statements +# to expand (effect position) +fn <- function() { + if (condition) return(1) + 2 +} + +# We want side-effects like `stop()` inside if statements +# to expand (effect position) +fn <- function() { + if (condition) stop("oh no!") + 2 +} + +# We don't prevent value positioned if statements that are actually +# side effects from expanding, i.e. we don't try and fix up poor logic +fn <- function() { + if (condition) stop("oh no!") +} \ No newline at end of file diff --git a/crates/air_r_formatter/tests/specs/r/if_statement.R.snap b/crates/air_r_formatter/tests/specs/r/if_statement.R.snap index 87d7d16a..2b8962bc 100644 --- a/crates/air_r_formatter/tests/specs/r/if_statement.R.snap +++ b/crates/air_r_formatter/tests/specs/r/if_statement.R.snap @@ -5,21 +5,18 @@ info: r/if_statement.R # Input ```R -if (a) 1 -if (a) 1 else 2 -if (a) 1 else if (b) 2 else 3 - -# Spacing test -if(a)1 else if(b)2 else 3 - -# Line break test -if (a_really_really_long_condition_here_that_is_allowed_to_break_onto_the_next_line) 1 else 2 +# --------------------------------------------------------------------------- +# Comments if (a) # becomes leading on `1 + 1` { 1 + 1 } +if (a) { # becomes leading on `1 + 1` + 1 + 1 +} + if (a) # becomes dangling on `{}` { } @@ -32,6 +29,10 @@ if (a) # becomes dangling on `{}` if (a) # becomes leading on `TRUE` TRUE +if +# becomes leading on `a` +(a) TRUE + if ( a # becomes trailing on `a` @@ -44,6 +45,255 @@ if (a # becomes trailing on `a` TRUE } +{ + if (condition) this # becomes leading on `this` + else that +} + +{ + if (condition) this + # becomes leading on `that` + else that +} + +# With `{ this }`, it's nice that this becomes leading on `this` +{ + if (condition) { this } # becomes leading on `this` + else that +} + +# With `{\n this\n}`, it's arguable whether this should lead +# `this` or lead `that`, but we keep things simple here to have +# one code path that also handles the `{ this }` case. +{ + if (condition) { + this + } # becomes leading on `this` + else that +} + +{ + if (condition) { + + } # becomes dangling on `{}` + else that +} + +{ + if (condition) { + this + } + # becomes leading on `that` + else that +} + +{ + if (condition) { + this + } + # becomes leading on `that` + else { + that + } +} + +{ + if (condition) { + this + } + # becomes dangling on `{}` + else { + + } +} + +{ + if (condition) { + this + } + # becomes leading on `that` + else if (condition) { + that + } +} + +{ + if (condition) { + this + } + # becomes leading on `that` + else if (condition) that +} + +{ + if (condition) { + this + } + # becomes dangling on `{}` + else if (condition) {} +} + +{ + if (condition) { + this + } + # becomes leading on `that` + # becomes leading on `that` part 2 + else if (condition) { + that + } +} + +{ + if (condition) a + else # becomes leading on `b` + b +} + +{ + if (condition) a + else # becomes leading on `b` + { + b + } +} + +if (condition) { + a +} else if (condition) { + b +} else # becomes leading on `c` +{ + c +} + +if (condition) # becomes leading on `a` +{ + a +} else if (condition) # becomes leading on `b` +{ + b +} else # becomes leading on `c` +{ + c +} + +# In general, greatly prefer creating leading comments on `a` rather than +# creating trailing comments on `a`, as it is much easier to handle from +# an idempotence point of view. +{ + if (condition) { + if (condition) a + } # becomes leading on `a`'s if statement + else { + b + } +} + +{ + if (condition) a # becomes leading on `a` + else if (condition) b +} + +{ + if (condition) { a } # becomes leading on `a` + else if (condition) b +} + +{ + if (condition) { + a + } # becomes leading on `a` + else if (condition) b +} + +# --------------------------------------------------------------------------- +# Comments - these comments aren't "enclosed" by the if statement, so they +# our outside our if statement comment handling hooks. Avoid the urge to +# try and attach these comments to the last node in the if statement, as +# that causes verbatim printing (with `fmt: skip`) to break, because biome's +# formatter expects that the trailing comment trails the outermost code element +# (here, the whole if statement, not the last node of it). We have `fmt: skip` +# related test elsewhere to ensure we don't break this. + +# These stay where they are, short one liner if statements in value position +# don't expand +x <- if (condition) a # becomes trailing on if statement +x <- if (condition) a else b # becomes trailing on if statement + +# These expand from being at top level in effect position +if (condition) a # becomes trailing on if statement +if (condition) a else b # becomes trailing on if statement + +if (condition) { a } # becomes trailing on if statement +if (condition) { a } else { b } # becomes trailing on if statement + +if (condition) a_really_really_long_name_here_that_forces_a_break_because_it_is_so_long # becomes trailing on if statement +if (condition) { a_really_really_long_name_here_that_forces_a_break_because_it_is_so_long } # becomes trailing on if statement + +if (condition) a_really_really_long_name_here else another_really_really_long_name # becomes trailing on if statement +if (condition) a_really_really_long_name_here else { another_really_really_long_name } # becomes trailing on if statement + +if (condition) a_really_really_long_name_here else if (condition2) another_really_really_long_name # becomes trailing on outer if statement +if (condition) a_really_really_long_name_here else if (condition2) { another_really_really_long_name } # becomes trailing on outer if statement + +if (condition) a_really_really_long_name_here else if (condition2) another_really_really_long_name else that_name # becomes trailing on outer if statement +if (condition) a_really_really_long_name_here else if (condition2) another_really_really_long_name else { that_name } # becomes trailing on outer if statement + +if (condition) + a # becomes trailing on if statement + +if (condition) # becomes leading on `a` + a # becomes trailing on if statement + +if (condition) { + a +} # becomes trailing on if statement + +# It would be nice to have consistent behavior, but it's basically impossible. +# Comment on `a` is enclosed by the if statement so our hooks handle it, but +# comment on `b` is not! +{ + if (condition) a # becomes leading on `a` + else b # becomes trailing on if statement +} + +{ + if (condition) a + else b # becomes trailing on if statement +} + +{ + if (condition) a + else { + b + } # becomes trailing on if statement +} + +{ + if (condition) a + else b + # Floating comment after the if statement +} + +# Nesting in `consequence` +x <- if (condition) if (condition2) this # becomes trailing on if statement +x <- if (condition) if (condition2) if (condition3) this # becomes trailing on if statement +x <- if (condition) if (condition2) this else that # becomes trailing on if statement +if (condition) if (condition2) this # becomes trailing on if statement +if (condition) if (condition2) if (condition3) this # becomes trailing on if statement +if (condition) if (condition2) this else that # becomes trailing on if statement + +# Nesting in `alternative` +x <- if (condition) this else that # stays flat, one liner if statement in value position +if (condition) this else that # becomes trailing on if statement +if (condition) this else if (condition2) that # becomes trailing on if statement +if (condition) this else if (condition2) this2 else that # becomes trailing on if statement +if (condition) this else if (condition2) this2 else if (condition3) that # becomes trailing on if statement + +# --------------------------------------------------------------------------- +# Special + # Breaks, but the `condition` itself fits and is not expanded if (this || this || this || this || this || this || this || this || this || this) { 1 @@ -57,6 +307,118 @@ if (this || this || this || this || this || this || this || this || this || this 2 } +# --------------------------------------------------------------------------- +# Auto bracing + +# Not allowed on one line without braces (effect position) +if (a) 1 +if (a) 1 else 2 +{ + if (a) 1 + 1 + 1 +} + +# Allowed on one line without braces (value position) +x = if (a) 1 +x <- if (a) 1 +x <<- if (a) 1 +x = if (a) 1 else 2 +x <- if (a) 1 else 2 +x <<- if (a) 1 else 2 +{ + if (a) 1 +} + +# Allowed on one line without braces (value position) +fn(if (a) 1) +fn(if (a) 1, if (a) 1 else 2) +fn(x = if (a) 1) +fn(if (a) 1, x = if (a) 1 else 2) +# Too complex, forces braces +fn(if (a) 1 else if (b) 2 else 3) +# Breaks argument list, but not if/else +fn(if (a) 1, x = if (a) 1 else 2, this_is_really_really_long_and_breaks_the_group_here) + +# Allowed on one line without braces (value position) +function(x = if (a) 1) {} +function(x = if (a) 1, y = if (a) 1 else 2) {} +# Too complex, forces braces +function(x = if (a) 1 else if (b) 2 else 3) {} +# Breaks parameter list, but not if/else +function(x = if (a) 1, y = if (a) 1 else 2, this_is_really_really_long_and_breaks_the_group_here) {} + +# The group breaking forces braces +x <- if (something_really_really_long_here_something_really_really_long_here) 1 else 2 +x <- if (a) something_really_really_long_here else and_something_really_really_long_here + +# The leading newline forces braces +x <- if (a) + 1 +x <- if (a) 1 else + 2 + +# The leading newline before `else` forces braces +{ + if (a) 1 + else 2 +} + +# The nested if in `consequence` forces braces around the outer if +x <- if (a) if (b) 1 +x <- if (a) if (b) if (c) 1 +x <- if (a) if (b) 1 else 2 + +# We don't force the inner if inside the `consequence` to have braces as +# well, because a user could have writen a short if in the inner branch to +# begin with (like the second example here) and that would have been valid. +x <- if (a) if (b) 1 +x <- if (a) { + if (b) 1 +} + +# The nested if in `alternative` forces braces +x <- if (a) 1 else if (b) 2 +x <- if (a) 1 else if (b) 2 else 3 + +# The braces on one piece force braces +x <- if (a) {1} else 2 +x <- if (a) {1} else {2} + +# Seen in dplyr, don't autobrace (value position) +map(xs, function(x) if (is_thing(x)) "this" else "that") + +# Seen in dplyr, don't autobrace (value position) +map(xs, function(x) { + if (is_thing(x)) "this" else "that" +}) + +# Seen in dplyr, don't autobrace (value position) +x <- x %||% if (condition) "this" else "that" + +# Seen in dplyr, don't autobrace (value position) +ntext <- function(n, msg1, msg2) { + if (n == 1) msg1 else msg2 +} + +# We want side-effects like `return()` inside if statements +# to expand (effect position) +fn <- function() { + if (condition) return(1) + 2 +} + +# We want side-effects like `stop()` inside if statements +# to expand (effect position) +fn <- function() { + if (condition) stop("oh no!") + 2 +} + +# We don't prevent value positioned if statements that are actually +# side effects from expanding, i.e. we don't try and fix up poor logic +fn <- function() { + if (condition) stop("oh no!") +} ``` @@ -76,18 +438,13 @@ Skip: None ----- ```R -if (a) 1 -if (a) 1 else 2 -if (a) 1 else if (b) 2 else 3 - -# Spacing test -if (a) 1 else if (b) 2 else 3 +# --------------------------------------------------------------------------- +# Comments -# Line break test -if ( - a_really_really_long_condition_here_that_is_allowed_to_break_onto_the_next_line -) - 1 else 2 +if (a) { + # becomes leading on `1 + 1` + 1 + 1 +} if (a) { # becomes leading on `1 + 1` @@ -103,9 +460,17 @@ if (a) { # inner comment but empty `{}` } -if (a) +if (a) { # becomes leading on `TRUE` TRUE +} + +if ( + # becomes leading on `a` + a +) { + TRUE +} if ( a @@ -120,6 +485,374 @@ if ( TRUE } +{ + if (condition) { + # becomes leading on `this` + this + } else { + that + } +} + +{ + if (condition) { + this + } else { + # becomes leading on `that` + that + } +} + +# With `{ this }`, it's nice that this becomes leading on `this` +{ + if (condition) { + # becomes leading on `this` + this + } else { + that + } +} + +# With `{\n this\n}`, it's arguable whether this should lead +# `this` or lead `that`, but we keep things simple here to have +# one code path that also handles the `{ this }` case. +{ + if (condition) { + # becomes leading on `this` + this + } else { + that + } +} + +{ + if (condition) { + # becomes dangling on `{}` + } else { + that + } +} + +{ + if (condition) { + this + } else { + # becomes leading on `that` + that + } +} + +{ + if (condition) { + this + } else { + # becomes leading on `that` + that + } +} + +{ + if (condition) { + this + } else { + # becomes dangling on `{}` + } +} + +{ + if (condition) { + this + } else if (condition) { + # becomes leading on `that` + that + } +} + +{ + if (condition) { + this + } else if (condition) { + # becomes leading on `that` + that + } +} + +{ + if (condition) { + this + } else if (condition) { + # becomes dangling on `{}` + } +} + +{ + if (condition) { + this + } else if (condition) { + # becomes leading on `that` + # becomes leading on `that` part 2 + that + } +} + +{ + if (condition) { + a + } else { + # becomes leading on `b` + b + } +} + +{ + if (condition) { + a + } else { + # becomes leading on `b` + b + } +} + +if (condition) { + a +} else if (condition) { + b +} else { + # becomes leading on `c` + c +} + +if (condition) { + # becomes leading on `a` + a +} else if (condition) { + # becomes leading on `b` + b +} else { + # becomes leading on `c` + c +} + +# In general, greatly prefer creating leading comments on `a` rather than +# creating trailing comments on `a`, as it is much easier to handle from +# an idempotence point of view. +{ + if (condition) { + # becomes leading on `a`'s if statement + if (condition) a + } else { + b + } +} + +{ + if (condition) { + # becomes leading on `a` + a + } else if (condition) { + b + } +} + +{ + if (condition) { + # becomes leading on `a` + a + } else if (condition) { + b + } +} + +{ + if (condition) { + # becomes leading on `a` + a + } else if (condition) { + b + } +} + +# --------------------------------------------------------------------------- +# Comments - these comments aren't "enclosed" by the if statement, so they +# our outside our if statement comment handling hooks. Avoid the urge to +# try and attach these comments to the last node in the if statement, as +# that causes verbatim printing (with `fmt: skip`) to break, because biome's +# formatter expects that the trailing comment trails the outermost code element +# (here, the whole if statement, not the last node of it). We have `fmt: skip` +# related test elsewhere to ensure we don't break this. + +# These stay where they are, short one liner if statements in value position +# don't expand +x <- if (condition) a # becomes trailing on if statement +x <- if (condition) a else b # becomes trailing on if statement + +# These expand from being at top level in effect position +if (condition) { + a +} # becomes trailing on if statement +if (condition) { + a +} else { + b +} # becomes trailing on if statement + +if (condition) { + a +} # becomes trailing on if statement +if (condition) { + a +} else { + b +} # becomes trailing on if statement + +if (condition) { + a_really_really_long_name_here_that_forces_a_break_because_it_is_so_long +} # becomes trailing on if statement +if (condition) { + a_really_really_long_name_here_that_forces_a_break_because_it_is_so_long +} # becomes trailing on if statement + +if (condition) { + a_really_really_long_name_here +} else { + another_really_really_long_name +} # becomes trailing on if statement +if (condition) { + a_really_really_long_name_here +} else { + another_really_really_long_name +} # becomes trailing on if statement + +if (condition) { + a_really_really_long_name_here +} else if (condition2) { + another_really_really_long_name +} # becomes trailing on outer if statement +if (condition) { + a_really_really_long_name_here +} else if (condition2) { + another_really_really_long_name +} # becomes trailing on outer if statement + +if (condition) { + a_really_really_long_name_here +} else if (condition2) { + another_really_really_long_name +} else { + that_name +} # becomes trailing on outer if statement +if (condition) { + a_really_really_long_name_here +} else if (condition2) { + another_really_really_long_name +} else { + that_name +} # becomes trailing on outer if statement + +if (condition) { + a +} # becomes trailing on if statement + +if (condition) { + # becomes leading on `a` + a +} # becomes trailing on if statement + +if (condition) { + a +} # becomes trailing on if statement + +# It would be nice to have consistent behavior, but it's basically impossible. +# Comment on `a` is enclosed by the if statement so our hooks handle it, but +# comment on `b` is not! +{ + if (condition) { + # becomes leading on `a` + a + } else { + b + } # becomes trailing on if statement +} + +{ + if (condition) { + a + } else { + b + } # becomes trailing on if statement +} + +{ + if (condition) { + a + } else { + b + } # becomes trailing on if statement +} + +{ + if (condition) { + a + } else { + b + } + # Floating comment after the if statement +} + +# Nesting in `consequence` +x <- if (condition) { + if (condition2) this +} # becomes trailing on if statement +x <- if (condition) { + if (condition2) { + if (condition3) this + } +} # becomes trailing on if statement +x <- if (condition) { + if (condition2) this else that +} # becomes trailing on if statement +if (condition) { + if (condition2) this +} # becomes trailing on if statement +if (condition) { + if (condition2) { + if (condition3) this + } +} # becomes trailing on if statement +if (condition) { + if (condition2) this else that +} # becomes trailing on if statement + +# Nesting in `alternative` +x <- if (condition) this else that # stays flat, one liner if statement in value position +if (condition) { + this +} else { + that +} # becomes trailing on if statement +if (condition) { + this +} else if (condition2) { + that +} # becomes trailing on if statement +if (condition) { + this +} else if (condition2) { + this2 +} else { + that +} # becomes trailing on if statement +if (condition) { + this +} else if (condition2) { + this2 +} else if (condition3) { + that +} # becomes trailing on if statement + +# --------------------------------------------------------------------------- +# Special + # Breaks, but the `condition` itself fits and is not expanded if ( this || this || this || this || this || this || this || this || this || this @@ -147,9 +880,205 @@ if ( } else { 2 } + +# --------------------------------------------------------------------------- +# Auto bracing + +# Not allowed on one line without braces (effect position) +if (a) { + 1 +} +if (a) { + 1 +} else { + 2 +} +{ + if (a) { + 1 + } + 1 + 1 +} + +# Allowed on one line without braces (value position) +x = if (a) 1 +x <- if (a) 1 +x <<- if (a) 1 +x = if (a) 1 else 2 +x <- if (a) 1 else 2 +x <<- if (a) 1 else 2 +{ + if (a) 1 +} + +# Allowed on one line without braces (value position) +fn(if (a) 1) +fn(if (a) 1, if (a) 1 else 2) +fn(x = if (a) 1) +fn(if (a) 1, x = if (a) 1 else 2) +# Too complex, forces braces +fn( + if (a) { + 1 + } else if (b) { + 2 + } else { + 3 + } +) +# Breaks argument list, but not if/else +fn( + if (a) 1, + x = if (a) 1 else 2, + this_is_really_really_long_and_breaks_the_group_here +) + +# Allowed on one line without braces (value position) +function(x = if (a) 1) { +} +function(x = if (a) 1, y = if (a) 1 else 2) { +} +# Too complex, forces braces +function( + x = if (a) { + 1 + } else if (b) { + 2 + } else { + 3 + } +) { +} +# Breaks parameter list, but not if/else +function( + x = if (a) 1, + y = if (a) 1 else 2, + this_is_really_really_long_and_breaks_the_group_here +) { +} + +# The group breaking forces braces +x <- if (something_really_really_long_here_something_really_really_long_here) { + 1 +} else { + 2 +} +x <- if (a) { + something_really_really_long_here +} else { + and_something_really_really_long_here +} + +# The leading newline forces braces +x <- if (a) { + 1 +} +x <- if (a) { + 1 +} else { + 2 +} + +# The leading newline before `else` forces braces +{ + if (a) { + 1 + } else { + 2 + } +} + +# The nested if in `consequence` forces braces around the outer if +x <- if (a) { + if (b) 1 +} +x <- if (a) { + if (b) { + if (c) 1 + } +} +x <- if (a) { + if (b) 1 else 2 +} + +# We don't force the inner if inside the `consequence` to have braces as +# well, because a user could have writen a short if in the inner branch to +# begin with (like the second example here) and that would have been valid. +x <- if (a) { + if (b) 1 +} +x <- if (a) { + if (b) 1 +} + +# The nested if in `alternative` forces braces +x <- if (a) { + 1 +} else if (b) { + 2 +} +x <- if (a) { + 1 +} else if (b) { + 2 +} else { + 3 +} + +# The braces on one piece force braces +x <- if (a) { + 1 +} else { + 2 +} +x <- if (a) { + 1 +} else { + 2 +} + +# Seen in dplyr, don't autobrace (value position) +map(xs, function(x) if (is_thing(x)) "this" else "that") + +# Seen in dplyr, don't autobrace (value position) +map(xs, function(x) { + if (is_thing(x)) "this" else "that" +}) + +# Seen in dplyr, don't autobrace (value position) +x <- x %||% if (condition) "this" else "that" + +# Seen in dplyr, don't autobrace (value position) +ntext <- function(n, msg1, msg2) { + if (n == 1) msg1 else msg2 +} + +# We want side-effects like `return()` inside if statements +# to expand (effect position) +fn <- function() { + if (condition) { + return(1) + } + 2 +} + +# We want side-effects like `stop()` inside if statements +# to expand (effect position) +fn <- function() { + if (condition) { + stop("oh no!") + } + 2 +} + +# We don't prevent value positioned if statements that are actually +# side effects from expanding, i.e. we don't try and fix up poor logic +fn <- function() { + if (condition) stop("oh no!") +} ``` # Lines exceeding max width of 80 characters ``` - 10: a_really_really_long_condition_here_that_is_allowed_to_break_onto_the_next_line + 387: x <- if (condition) this else that # stays flat, one liner if statement in value position ``` diff --git a/crates/air_r_formatter/tests/specs/r/repeat_statement.R b/crates/air_r_formatter/tests/specs/r/repeat_statement.R index c36e98db..d1d0e8cf 100644 --- a/crates/air_r_formatter/tests/specs/r/repeat_statement.R +++ b/crates/air_r_formatter/tests/specs/r/repeat_statement.R @@ -1,41 +1,59 @@ -repeat 1 - repeat {} -repeat { # a comment +repeat { + a + b +} + +repeat +{ + a + b } -repeat { # comment1 - # comment2 +# ------------------------------------------------------------------------------ +# Autobracing + +repeat 1 + +repeat 1 + 1 + +# ------------------------------------------------------------------------------ +# Comments + +repeat { # dangles {} } -repeat # comment1 +# These should be consistent +repeat { # leads a + # leads a 2 + a +} +repeat # leads a { - # comment2 - 1 + 1 + # leads a 2 + a } -repeat # comment1 +repeat # dangles {} {} -repeat # comment1 +repeat # dangles {} { - # comment2 + # dangles {} 2 } repeat -# comment1 +# leads a { - 1 + 1 + a } -# comment1 +# leads repeat repeat { - # comment2 - 1 + 1 + # leads a + a } -repeat # comment1 - 1 +repeat # leads a + a diff --git a/crates/air_r_formatter/tests/specs/r/repeat_statement.R.snap b/crates/air_r_formatter/tests/specs/r/repeat_statement.R.snap index 672a943d..4bf4e65a 100644 --- a/crates/air_r_formatter/tests/specs/r/repeat_statement.R.snap +++ b/crates/air_r_formatter/tests/specs/r/repeat_statement.R.snap @@ -5,47 +5,65 @@ info: r/repeat_statement.R # Input ```R -repeat 1 - repeat {} -repeat { # a comment +repeat { + a + b +} + +repeat +{ + a + b } -repeat { # comment1 - # comment2 +# ------------------------------------------------------------------------------ +# Autobracing + +repeat 1 + +repeat 1 + 1 + +# ------------------------------------------------------------------------------ +# Comments + +repeat { # dangles {} } -repeat # comment1 +# These should be consistent +repeat { # leads a + # leads a 2 + a +} +repeat # leads a { - # comment2 - 1 + 1 + # leads a 2 + a } -repeat # comment1 +repeat # dangles {} {} -repeat # comment1 +repeat # dangles {} { - # comment2 + # dangles {} 2 } repeat -# comment1 +# leads a { - 1 + 1 + a } -# comment1 +# leads repeat repeat { - # comment2 - 1 + 1 + # leads a + a } -repeat # comment1 - 1 +repeat # leads a + a ``` @@ -66,48 +84,69 @@ Skip: None ----- ```R -repeat 1 +repeat { +} repeat { + a + b } repeat { - # a comment + a + b } +# ------------------------------------------------------------------------------ +# Autobracing + repeat { - # comment1 - # comment2 - 1 + 1 + 1 } repeat { - # comment1 - # comment2 1 + 1 } +# ------------------------------------------------------------------------------ +# Comments + repeat { - # comment1 + # dangles {} } +# These should be consistent repeat { - # comment1 - # comment2 + # leads a + # leads a 2 + a +} +repeat { + # leads a + # leads a 2 + a } repeat { - # comment1 - 1 + 1 + # dangles {} } -# comment1 repeat { - # comment2 - 1 + 1 + # dangles {} + # dangles {} 2 } -repeat - # comment1 - 1 +repeat { + # leads a + a +} + +# leads repeat +repeat { + # leads a + a +} + +repeat { + # leads a + a +} ``` diff --git a/crates/air_r_formatter/tests/specs/r/while_statement.R b/crates/air_r_formatter/tests/specs/r/while_statement.R index 355c906d..91fab1c5 100644 --- a/crates/air_r_formatter/tests/specs/r/while_statement.R +++ b/crates/air_r_formatter/tests/specs/r/while_statement.R @@ -1,13 +1,9 @@ -while(a)a - while(a){} while(a) { 1 + 1 } -# TODO: Not entirely sure how this should be formatted. -# It's not very common though. while({ complex }) { 1 + 1 } @@ -16,30 +12,75 @@ while(super_long_function_name_is_true_man_this_is_a_really_really_long_function 1 + 1 } -while( - # comment - a) { - 1 + 1 +# ------------------------------------------------------------------------------ +# Autobracing + +while(a)a + +while(a) + a + +# ------------------------------------------------------------------------------ +# Comments + +while # leads while +(a) { + b +} + +while +# leads while +(a) { + b +} + +while(a # leads while +) { + b } while(a -# comment +# leads while ) { - 1 + 1 + b } -while(a # comment +while( # leads while + a) { + b +} + +while( + # leads while + a) { + b +} + +while( + a + # leads while ) { - 1 + 1 + b +} + +while(a) # leads b +{ + b +} + +while(a) +# leads b +{ + b } -while(a) # comment +while(a) # dangles {} {} -while(a) # comment -1 +while(a) # leads b +b -while(a) # comment1 +while(a) # dangles {} { - # comment2 + # dangles {} 2 } diff --git a/crates/air_r_formatter/tests/specs/r/while_statement.R.snap b/crates/air_r_formatter/tests/specs/r/while_statement.R.snap index 335f4a8f..4f6c0dd3 100644 --- a/crates/air_r_formatter/tests/specs/r/while_statement.R.snap +++ b/crates/air_r_formatter/tests/specs/r/while_statement.R.snap @@ -5,16 +5,12 @@ info: r/while_statement.R # Input ```R -while(a)a - while(a){} while(a) { 1 + 1 } -# TODO: Not entirely sure how this should be formatted. -# It's not very common though. while({ complex }) { 1 + 1 } @@ -23,32 +19,77 @@ while(super_long_function_name_is_true_man_this_is_a_really_really_long_function 1 + 1 } -while( - # comment - a) { - 1 + 1 +# ------------------------------------------------------------------------------ +# Autobracing + +while(a)a + +while(a) + a + +# ------------------------------------------------------------------------------ +# Comments + +while # leads while +(a) { + b +} + +while +# leads while +(a) { + b +} + +while(a # leads while +) { + b } while(a -# comment +# leads while ) { - 1 + 1 + b } -while(a # comment +while( # leads while + a) { + b +} + +while( + # leads while + a) { + b +} + +while( + a + # leads while ) { - 1 + 1 + b +} + +while(a) # leads b +{ + b +} + +while(a) +# leads b +{ + b } -while(a) # comment +while(a) # dangles {} {} -while(a) # comment -1 +while(a) # leads b +b -while(a) # comment1 +while(a) # dangles {} { - # comment2 + # dangles {} 2 } ``` @@ -70,8 +111,6 @@ Skip: None ----- ```R -while (a) a - while (a) { } @@ -79,8 +118,6 @@ while (a) { 1 + 1 } -# TODO: Not entirely sure how this should be formatted. -# It's not very common though. while ( { complex @@ -95,36 +132,76 @@ while ( 1 + 1 } -while ( - # comment +# ------------------------------------------------------------------------------ +# Autobracing + +while (a) { a -) { - 1 + 1 } -while ( +while (a) { a - # comment -) { - 1 + 1 } -while ( - a # comment -) { - 1 + 1 +# ------------------------------------------------------------------------------ +# Comments + +# leads while +while (a) { + b } +# leads while while (a) { - # comment + b } -while (a) - # comment - 1 +# leads while +while (a) { + b +} + +# leads while +while (a) { + b +} + +# leads while +while (a) { + b +} + +# leads while +while (a) { + b +} + +# leads while +while (a) { + b +} + +while (a) { + # leads b + b +} + +while (a) { + # leads b + b +} + +while (a) { + # dangles {} +} + +while (a) { + # leads b + b +} while (a) { - # comment1 - # comment2 + # dangles {} + # dangles {} 2 } ``` diff --git a/docs/formatter.qmd b/docs/formatter.qmd index 84828428..7fd3f589 100644 --- a/docs/formatter.qmd +++ b/docs/formatter.qmd @@ -174,6 +174,321 @@ list(foo, bar) The goal of this feature is to strike a balance between being opinionated and recognizing that users often know when taking up more vertical space results in more readable output. +# Autobracing + +To encourage more consistent, readable, and portable code, Air will *autobrace* the following elements: + +- If statements + +- For, while, and repeat loops + +- Function definitions + +Autobracing is the process of wrapping the body of these code elements with `{ }` if braces don't already exist. + +## If statements + +Air will autobrace if statements if: + +- Any existing part of the if statement spans multiple lines + +- Any existing part of the if statement is already braced + +- The if statement is nested, i.e. there is an `else if {` + +- The if statement exceeds the line length + +For example, the following will all be autobraced: + +``` r +if (condition) + a + +# Becomes: +if (condition) { + a +} +``` + +``` r +if (condition) a else { b } + +# Becomes: +if (condition) { + a +} else { + b +} +``` + +``` r +if (condition) a else if (condition2) b else c + +# Becomes: +if (condition) { + a +} else if (condition2) { + b +} else { + c +} +``` + +Simple if statements that don't hit any of the autobracing criteria mentioned above are allowed to stay on one line as long as they are also in *value* position, as opposed to *effect* position. + +- Top level if statements are in effect position. + +- If statements that are direct children of `{}` are in effect position, unless the if statement is the last child of the `{}` expression list, in which case it is in value position (because it is the returned value from that scope). + +- Otherwise, the if statement is in value position. + +This if statement is at top level, putting it in effect position, and would be autobraced: + +``` r +if (condition) a else b + +# Becomes: +if (condition) { + a +} else { + b +} +``` + +These if statements are children of `{}` (but aren't the last child!), putting them in effect position, and would be autobraced: + +``` r +fn <- function(x) { + if (condition) stop("oh no") + if (condition) return(1) + if (condition) x <- 1 + x + 1 +} + +# Becomes: +fn <- function(x) { + if (condition) { + stop("oh no") + } + if (condition) { + return(1) + } + if (condition) { + x <- 1 + } + x + 1 +} +``` + +It's particularly important for code involving `stop()`, `return()`, and `<-` to be easily readable on their own line because they cause *side effects* that affect control flow or state. + +These if statements are in value position, and would not be autobraced: + +``` r +x <- if (condition) 1 else 2 + +x <- x %||% if (condition) 1 else 2 + +list(a = if (condition) 1 else 2) + +function( + a, + optional = if (is.null(a)) 1 else 2 +) { +} + +# If statement is the last expression of the `{}` scope +map(xs, function(x) { + if (is.null(x)) 1 else 2 +}) +``` + +### Portability + +It is particularly important to autobrace multiline if statements for *portability*, which is the ability to copy and paste that if statement into any context and have it still parse. +Consider the following if statement: + +``` r +fn <- function(a) { + if (is.null(a)) + 1 + else + 2 +} +``` + +This parses and runs correctly while the if statement is nested within the `{}` braces of the function. +But if you're testing this code and you copy and paste it out of the function, then it no longer parses: + +``` r +if (is.null(a)) + 1 +else + 2 +``` + +If you try and run this, then you'll see an error like `Error: unexpected 'else'`. +This is particularly annoying when you're working inside a debugger. +Most R debuggers allow you to pause inside functions and highlight and run chunks of that function. +If you're paused inside `fn()` and try to highlight and run the if statement, then it will confusingly fail to parse. +Autobracing multiline if statements avoids this problem entirely. + +## For, while, and repeat loops + +Air unconditionally autobraces the body of all R loops. +This is mostly for consistency with if statements, as it is fairly uncommon to see braceless loops in practice. + +``` r +for (i in 1:5) x <- x + i + +# Becomes: +for (i in 1:5) { + x <- x + i +} +``` + +``` r +while (x < 5) x <- x + 1 + +# Becomes: +while (x < 5) { + x <- x + 1 +} +``` + +## Function definitions + +Air will autobrace the body of a function definition if: + +- Any existing part of the function definition spans multiple lines + +- The function definition exceeds the line length + +``` r +fn <- function(a, b) + a + b + +# Becomes: +fn <- function(a, b) { + a + b +} +``` + +``` r +fn <- function( + a, + b +) a + b + +# Becomes: +fn <- function( + a, + b +) { + a + b +} +``` + +``` r +fn <- function(a_really_long_variable_name, another_really_long_name) a_really_long_variable_name + another_really_long_name + +# Becomes: +fn <- function( + a_really_long_variable_name, + another_really_long_name +) { + a_really_long_variable_name + another_really_long_name +} +``` + +Short function definitions are allowed on one line and will not be autobraced. +These are all allowed by Air: + +``` r +add_one <- function(x) x + 1 + +map_lgl(xs, function(x) is.logical(x) && length(x) == 1L && !is.na(x)) + +# This includes anonymous functions +map_lgl(xs, \(x) is.list(x) && length(x) == 0L) +``` + +## With persistent line breaks + +Autobracing is particularly useful as a code rewriting tool when combined with persistent line breaks. +Consider: + +``` r +result <- map_lgl(xs, function(x) is.logical(x) && length(x) == 1L && !is.na(x)) +``` + +This may be easier to read if it spanned across multiple lines. +You could manually rework this, or you could let Air help you! +There are two places you could put a persistent line break depending on what your desired final result is: + +``` r +# Adding a line break before `xs` expands the call +result <- map_lgl( + xs, function(x) is.logical(x) && length(x) == 1L && !is.na(x)) + +# Becomes: +result <- map_lgl( + xs, + function(x) is.logical(x) && length(x) == 1L && !is.na(x) +) +``` + +``` r +# Adding a line break before `is.logical(x)` forces autobracing +result <- map_lgl(xs, function(x) + is.logical(x) && length(x) == 1L && !is.na(x)) + +# Becomes: +result <- map_lgl(xs, function(x) { + is.logical(x) && length(x) == 1L && !is.na(x) +}) +``` + +## Comments + +Air generally avoids moving your comments. +However, when Air autobraces code, it may have to adjust them. +This generally works quite well for most code, but is impossible to do perfectly. +It is possible that you will have to adjust the placement of your comments after Air runs. + +For example, leading comments on autobraced elements are generally placed in a way that you'd expect: + +``` r +if (condition) + # My comment + a + +# Becomes: +if (condition) { + # My comment + a +} +``` + +But trailing comments might need manual adjustment: + +``` r +if (condition) + a # My comment + +# Becomes: +if (condition) { + a +} # My comment + +# You may want to adjust it to: +if (condition) { + a # My comment +} +``` + +In general, prefer leading comments over trailing comments for readability and to have the highest chance of Air placing it in the correct location when comment adjustment is required. + # Disabling formatting ## Skip comments