diff --git a/src/cargo/util/toml/embedded.rs b/src/cargo/util/toml/embedded.rs index 2257d2b8b7d..481265d2b12 100644 --- a/src/cargo/util/toml/embedded.rs +++ b/src/cargo/util/toml/embedded.rs @@ -140,44 +140,28 @@ impl<'s> ScriptSource<'s> { content: input, }; - // See rust-lang/rust's compiler/rustc_lexer/src/lib.rs's `strip_shebang` - // Shebang must start with `#!` literally, without any preceding whitespace. - // For simplicity we consider any line starting with `#!` a shebang, - // regardless of restrictions put on shebangs by specific platforms. - if let Some(rest) = source.content.strip_prefix("#!") { - // Ok, this is a shebang but if the next non-whitespace token is `[`, - // then it may be valid Rust code, so consider it Rust code. - // - // NOTE: rustc considers line and block comments to be whitespace but to avoid - // any more awareness of Rust grammar, we are excluding it. - if rest.trim_start().starts_with('[') { - return Ok(source); - } - - // No other choice than to consider this a shebang. - let newline_end = source - .content - .find('\n') - .map(|pos| pos + 1) - .unwrap_or(source.content.len()); - let (shebang, content) = source.content.split_at(newline_end); + if let Some(shebang_end) = strip_shebang(source.content) { + let (shebang, content) = source.content.split_at(shebang_end); source.shebang = Some(shebang); source.content = content; } const FENCE_CHAR: char = '-'; - let mut trimmed_content = source.content; - while !trimmed_content.is_empty() { - let c = trimmed_content; - let c = c.trim_start_matches([' ', '\t']); - let c = c.trim_start_matches(['\r', '\n']); - if c == trimmed_content { + let mut rest = source.content; + while !rest.is_empty() { + let without_spaces = rest.trim_start_matches([' ', '\t']); + let without_nl = without_spaces.trim_start_matches(['\r', '\n']); + if without_nl == rest { + // nothing trimmed break; + } else if without_nl == without_spaces { + // frontmatter must come after a newline + return Ok(source); } - trimmed_content = c; + rest = without_nl; } - let fence_end = trimmed_content + let fence_end = rest .char_indices() .find_map(|(i, c)| (c != FENCE_CHAR).then_some(i)) .unwrap_or(source.content.len()); @@ -190,8 +174,9 @@ impl<'s> ScriptSource<'s> { "found {fence_end} `{FENCE_CHAR}` in rust frontmatter, expected at least 3" ) } - _ => trimmed_content.split_at(fence_end), + _ => rest.split_at(fence_end), }; + let nl_fence_pattern = format!("\n{fence_pattern}"); let (info, content) = rest.split_once("\n").unwrap_or((rest, "")); let info = info.trim(); if !info.is_empty() { @@ -199,11 +184,11 @@ impl<'s> ScriptSource<'s> { } source.content = content; - let Some((frontmatter, content)) = source.content.split_once(fence_pattern) else { + let Some(frontmatter_nl) = source.content.find(&nl_fence_pattern) else { anyhow::bail!("no closing `{fence_pattern}` found for frontmatter"); }; - source.frontmatter = Some(frontmatter); - source.content = content; + source.frontmatter = Some(&source.content[..frontmatter_nl + 1]); + source.content = &source.content[frontmatter_nl + nl_fence_pattern.len()..]; let (line, content) = source .content @@ -235,6 +220,26 @@ impl<'s> ScriptSource<'s> { } } +fn strip_shebang(input: &str) -> Option { + // See rust-lang/rust's compiler/rustc_lexer/src/lib.rs's `strip_shebang` + // Shebang must start with `#!` literally, without any preceding whitespace. + // For simplicity we consider any line starting with `#!` a shebang, + // regardless of restrictions put on shebangs by specific platforms. + if let Some(rest) = input.strip_prefix("#!") { + // Ok, this is a shebang but if the next non-whitespace token is `[`, + // then it may be valid Rust code, so consider it Rust code. + // + // NOTE: rustc considers line and block comments to be whitespace but to avoid + // any more awareness of Rust grammar, we are excluding it. + if !rest.trim_start().starts_with('[') { + // No other choice than to consider this a shebang. + let newline_end = input.find('\n').map(|pos| pos + 1).unwrap_or(input.len()); + return Some(newline_end); + } + } + None +} + #[cfg(test)] mod test_expand { use snapbox::assert_data_eq; @@ -466,6 +471,86 @@ fn main() {} ); } + #[test] + fn split_indent() { + assert_source( + r#"#!/usr/bin/env cargo + --- + [dependencies] + time="0.1.25" + ---- + +fn main() {} +"#, + str![[r##" +shebang: "#!/usr/bin/env cargo\n" +info: None +frontmatter: None +content: " ---\n [dependencies]\n time=\"0.1.25\"\n ----\n\nfn main() {}\n" + +"##]], + ); + } + + #[test] + fn split_escaped() { + assert_source( + r#"#!/usr/bin/env cargo +----- +--- +--- +----- + +fn main() {} +"#, + str![[r##" +shebang: "#!/usr/bin/env cargo\n" +info: None +frontmatter: "---\n---\n" +content: "\nfn main() {}\n" + +"##]], + ); + } + + #[test] + fn split_invalid_escaped() { + assert_err( + ScriptSource::parse( + r#"#!/usr/bin/env cargo +--- +----- +----- +--- + +fn main() {} +"#, + ), + str!["unexpected trailing content on closing fence: `--`"], + ); + } + + #[test] + fn split_dashes_in_body() { + assert_source( + r#"#!/usr/bin/env cargo +--- +Hello--- +World +--- + +fn main() {} +"#, + str![[r##" +shebang: "#!/usr/bin/env cargo\n" +info: None +frontmatter: "Hello---\nWorld\n" +content: "\nfn main() {}\n" + +"##]], + ); + } + #[test] fn split_mismatched_dashes() { assert_err(