From 0d04e28fc43b0e7dd4052532dff6488bd80c4942 Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Thu, 2 Jan 2025 08:34:54 +0200 Subject: [PATCH 1/2] Fix formatting of control characters \x1a through \x1f Due to an incorrect range, control characters \x1a through \x1f get formatted as Unicode \u escapes rather than hex \x escapes. Fix the range, and add a test. Without the fix, the test fails like this: ``` left: "\"\\0\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\x08\\t\\n\\x11\\x12\\r\\x14\\x15\\x16\\x17\\x18\\x19\\x1a\\x1b\\x1c\\x1d\\x1e\\x1f \\x7f\\x80\\x81\\xfe\\xff\"" right: "\"\\0\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\x08\\t\\n\\x11\\x12\\r\\x14\\x15\\x16\\x17\\x18\\x19\\u{1a}\\u{1b}\\u{1c}\\u{1d}\\u{1e}\\u{1f} \\x7f\\x80\\x81\\xfe\\xff\"" ``` --- src/impls.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/impls.rs b/src/impls.rs index 1c9614d..c0587bc 100644 --- a/src/impls.rs +++ b/src/impls.rs @@ -539,7 +539,7 @@ mod bstr { '\x01'..='\x08' | '\x0b' | '\x0c' - | '\x0e'..='\x19' + | '\x0e'..='\x1f' | '\x7f' => { write!(f, "\\x{:02x}", ch as u32)?; } @@ -1305,7 +1305,12 @@ fn test_debug() { // Before fixing #188, the output here would be: // \\xED\\xA0\\x80Aa\\x7f\\x0b B(&format!("{:?}", b"\xed\xa0\x80Aa\x7f\x0b".as_bstr())).as_bstr(), - ) + ); + + assert_eq!( + r#""\0\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x11\x12\r\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f \x7f\x80\x81\xfe\xff""#, + format!("{:?}", b"\0\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x11\x12\r\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f \x7f\x80\x81\xfe\xff".as_bstr()), + ); } // See: https://github.com/BurntSushi/bstr/issues/82 From bbeda2be2e61c2a1412afe19fb41f7ae3a029dfa Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Thu, 2 Jan 2025 08:44:51 +0200 Subject: [PATCH 2/2] Simplify escaping logic using escape_ascii Replace manual ranges of characters to escape with a call to `escape_ascii` for anything in the ASCII range. --- src/impls.rs | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/src/impls.rs b/src/impls.rs index c0587bc..de25392 100644 --- a/src/impls.rs +++ b/src/impls.rs @@ -525,6 +525,9 @@ mod bstr { for (s, e, ch) in self.char_indices() { match ch { '\0' => write!(f, "\\0")?, + '\x01'..='\x7f' => { + write!(f, "{}", (ch as u8).escape_ascii())?; + } '\u{FFFD}' => { let bytes = self[s..e].as_bytes(); if bytes == b"\xEF\xBF\xBD" { @@ -535,17 +538,6 @@ mod bstr { } } } - // ASCII control characters except \0, \n, \r, \t - '\x01'..='\x08' - | '\x0b' - | '\x0c' - | '\x0e'..='\x1f' - | '\x7f' => { - write!(f, "\\x{:02x}", ch as u32)?; - } - '\n' | '\r' | '\t' => { - write!(f, "{}", ch.escape_debug())?; - } _ => { write!(f, "{}", ch.escape_debug())?; }