From f3c5b63095519d83acfea75dfdebca1639e82055 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Fri, 20 Dec 2024 13:52:42 -0500 Subject: [PATCH] Avoid duplicating backslashes in sysconfig parser (#10063) ## Summary We had a bug in our handling of escape sequences that caused us to duplicate backslashes. If you installed repeatedly, we'd keep doubling them, leading to an exponential blowup. Closes #10060. --- crates/uv-python/src/sysconfig/parser.rs | 86 ++++++++++++++++++++---- 1 file changed, 74 insertions(+), 12 deletions(-) diff --git a/crates/uv-python/src/sysconfig/parser.rs b/crates/uv-python/src/sysconfig/parser.rs index 427bf327af1c..6620459de91a 100644 --- a/crates/uv-python/src/sysconfig/parser.rs +++ b/crates/uv-python/src/sysconfig/parser.rs @@ -166,18 +166,20 @@ fn parse_string(cursor: &mut Cursor, quote: char) -> Result { }; match c { '\\' => { - // Handle escaped quotes. - if cursor.first() == quote { - // Consume the backslash. - cursor.bump(); - result.push(quote); - continue; - } - - // Keep the backslash and following character. - result.push('\\'); - result.push(cursor.first()); - cursor.bump(); + // Treat the next character as a literal. + // + // See: https://github.com/astral-sh/ruff/blob/d47fba1e4aeeb18085900dfbbcd187e90d536913/crates/ruff_python_parser/src/string.rs#L194 + let Some(c) = cursor.bump() else { + return Err(Error::UnexpectedEof); + }; + result.push(match c { + '\\' => '\\', + '\'' => '\'', + '\"' => '"', + _ => { + return Err(Error::UnrecognizedEscape(c)); + } + }); } // Consume closing quote. @@ -255,6 +257,8 @@ pub enum Error { UnexpectedCharacter(char), #[error("Unexpected end of file")] UnexpectedEof, + #[error("Unrecognized escape sequence: {0}")] + UnrecognizedEscape(char), #[error("Failed to parse integer")] ParseInt(#[from] std::num::ParseIntError), #[error("`_sysconfigdata_` is missing a header comment")] @@ -293,6 +297,64 @@ mod tests { "###); } + #[test] + fn test_parse_backslash() { + let input = indoc::indoc!( + r#" + # system configuration generated and used by the sysconfig module + build_time_vars = { + "key1": "value1\"value2\"value3", + "key2": "value1\\value2\\value3", + "key3": "value1\\\"value2\\\"value3", + "key4": "value1\\\\value2\\\\value3", + } + "# + ); + + let result = input.parse::().expect("Parsing failed"); + let snapshot = result.to_string_pretty().unwrap(); + + insta::assert_snapshot!(snapshot, @r###" + # system configuration generated and used by the sysconfig module + build_time_vars = { + "key1": "value1\"value2\"value3", + "key2": "value1\\value2\\value3", + "key3": "value1\\\"value2\\\"value3", + "key4": "value1\\\\value2\\\\value3" + } + "###); + } + + #[test] + fn test_parse_trailing_backslash() { + let input = indoc::indoc!( + r#" + # system configuration generated and used by the sysconfig module + build_time_vars = { + "key1": "value1\\value2\\value3\", + } + "# + ); + + let result = input.parse::(); + assert!(matches!(result, Err(Error::UnexpectedEof))); + } + + #[test] + fn test_parse_unrecognized_escape() { + let input = indoc::indoc!( + r#" + # system configuration generated and used by the sysconfig module + build_time_vars = { + "key1": "value1\value2", + } + "# + ); + + let result = input.parse::(); + assert!(matches!(result, Err(Error::UnrecognizedEscape('v')))); + } + #[test] fn test_parse_trailing_comma() { let input = indoc::indoc!(