From 8b94af3639168149424c5335db93f2c31e5ef84d Mon Sep 17 00:00:00 2001 From: Markus Reiter Date: Mon, 20 May 2024 20:33:56 +0200 Subject: [PATCH 1/5] Optimize `escape_ascii`. --- library/core/src/escape.rs | 51 +++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 26 deletions(-) diff --git a/library/core/src/escape.rs b/library/core/src/escape.rs index f6ec30b9f793a..628ac820e3fe0 100644 --- a/library/core/src/escape.rs +++ b/library/core/src/escape.rs @@ -24,32 +24,31 @@ const fn backslash(a: ascii::Char) -> ([ascii::Char; N], Range(byte: u8) -> ([ascii::Char; N], Range) { const { assert!(N >= 4) }; - match byte { - b'\t' => backslash(ascii::Char::SmallT), - b'\r' => backslash(ascii::Char::SmallR), - b'\n' => backslash(ascii::Char::SmallN), - b'\\' => backslash(ascii::Char::ReverseSolidus), - b'\'' => backslash(ascii::Char::Apostrophe), - b'\"' => backslash(ascii::Char::QuotationMark), - byte => { - let mut output = [ascii::Char::Null; N]; - - if let Some(c) = byte.as_ascii() - && !byte.is_ascii_control() - { - output[0] = c; - (output, 0..1) - } else { - let hi = HEX_DIGITS[(byte >> 4) as usize]; - let lo = HEX_DIGITS[(byte & 0xf) as usize]; - - output[0] = ascii::Char::ReverseSolidus; - output[1] = ascii::Char::SmallX; - output[2] = hi; - output[3] = lo; - - (output, 0..4) - } + let mut output = [ascii::Char::Null; N]; + + match byte.as_ascii() { + Some(ascii::Char::CharacterTabulation) => backslash(ascii::Char::SmallT), + Some(ascii::Char::CarriageReturn) => backslash(ascii::Char::SmallR), + Some(ascii::Char::LineFeed) => backslash(ascii::Char::SmallN), + Some( + c @ ascii::Char::ReverseSolidus + | c @ ascii::Char::Apostrophe + | c @ ascii::Char::QuotationMark, + ) => backslash(c), + Some(c) if !byte.is_ascii_control() => { + output[0] = c; + (output, 0..1) + } + _ => { + let hi = HEX_DIGITS[(byte >> 4) as usize]; + let lo = HEX_DIGITS[(byte & 0xf) as usize]; + + output[0] = ascii::Char::ReverseSolidus; + output[1] = ascii::Char::SmallX; + output[2] = hi; + output[3] = lo; + + (output, 0..4) } } } From c9c3f917531a3a6feb9820d5fff60bac808b99cf Mon Sep 17 00:00:00 2001 From: Markus Reiter Date: Wed, 22 May 2024 12:35:09 +0200 Subject: [PATCH 2/5] Order `escape_ascii` by character frequency. --- library/core/src/escape.rs | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/library/core/src/escape.rs b/library/core/src/escape.rs index 628ac820e3fe0..b3652a1cabc6c 100644 --- a/library/core/src/escape.rs +++ b/library/core/src/escape.rs @@ -26,19 +26,21 @@ const fn escape_ascii(byte: u8) -> ([ascii::Char; N], Range) let mut output = [ascii::Char::Null; N]; + // NOTE: This `match` is roughly ordered by the frequency of ASCII + // characters for performance. match byte.as_ascii() { - Some(ascii::Char::CharacterTabulation) => backslash(ascii::Char::SmallT), - Some(ascii::Char::CarriageReturn) => backslash(ascii::Char::SmallR), - Some(ascii::Char::LineFeed) => backslash(ascii::Char::SmallN), - Some( - c @ ascii::Char::ReverseSolidus - | c @ ascii::Char::Apostrophe - | c @ ascii::Char::QuotationMark, - ) => backslash(c), Some(c) if !byte.is_ascii_control() => { output[0] = c; (output, 0..1) } + Some(ascii::Char::LineFeed) => backslash(ascii::Char::SmallN), + Some(ascii::Char::CarriageReturn) => backslash(ascii::Char::SmallR), + Some(ascii::Char::CharacterTabulation) => backslash(ascii::Char::SmallT), + Some( + c @ ascii::Char::QuotationMark + | c @ ascii::Char::Apostrophe + | c @ ascii::Char::ReverseSolidus, + ) => backslash(c), _ => { let hi = HEX_DIGITS[(byte >> 4) as usize]; let lo = HEX_DIGITS[(byte & 0xf) as usize]; From 5606b0231a6f748a3d5eb4cef4d3419cf2b500e1 Mon Sep 17 00:00:00 2001 From: Markus Reiter Date: Wed, 22 May 2024 13:01:01 +0200 Subject: [PATCH 3/5] Fix match ordering. --- library/core/src/escape.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/library/core/src/escape.rs b/library/core/src/escape.rs index b3652a1cabc6c..3c09f5fdc20b5 100644 --- a/library/core/src/escape.rs +++ b/library/core/src/escape.rs @@ -29,6 +29,11 @@ const fn escape_ascii(byte: u8) -> ([ascii::Char; N], Range) // NOTE: This `match` is roughly ordered by the frequency of ASCII // characters for performance. match byte.as_ascii() { + Some( + c @ ascii::Char::QuotationMark + | c @ ascii::Char::Apostrophe + | c @ ascii::Char::ReverseSolidus, + ) => backslash(c), Some(c) if !byte.is_ascii_control() => { output[0] = c; (output, 0..1) @@ -36,11 +41,6 @@ const fn escape_ascii(byte: u8) -> ([ascii::Char; N], Range) Some(ascii::Char::LineFeed) => backslash(ascii::Char::SmallN), Some(ascii::Char::CarriageReturn) => backslash(ascii::Char::SmallR), Some(ascii::Char::CharacterTabulation) => backslash(ascii::Char::SmallT), - Some( - c @ ascii::Char::QuotationMark - | c @ ascii::Char::Apostrophe - | c @ ascii::Char::ReverseSolidus, - ) => backslash(c), _ => { let hi = HEX_DIGITS[(byte >> 4) as usize]; let lo = HEX_DIGITS[(byte & 0xf) as usize]; From b161093b2890562834c889a55daf9f2a75de6a9d Mon Sep 17 00:00:00 2001 From: Markus Reiter Date: Sat, 1 Jun 2024 20:49:54 +0200 Subject: [PATCH 4/5] Further optimization. --- library/core/src/escape.rs | 61 +++++++++++++++++++++----------------- 1 file changed, 33 insertions(+), 28 deletions(-) diff --git a/library/core/src/escape.rs b/library/core/src/escape.rs index 3c09f5fdc20b5..7dae2c8a3b24b 100644 --- a/library/core/src/escape.rs +++ b/library/core/src/escape.rs @@ -24,35 +24,40 @@ const fn backslash(a: ascii::Char) -> ([ascii::Char; N], Range(byte: u8) -> ([ascii::Char; N], Range) { const { assert!(N >= 4) }; - let mut output = [ascii::Char::Null; N]; - - // NOTE: This `match` is roughly ordered by the frequency of ASCII - // characters for performance. - match byte.as_ascii() { - Some( - c @ ascii::Char::QuotationMark - | c @ ascii::Char::Apostrophe - | c @ ascii::Char::ReverseSolidus, - ) => backslash(c), - Some(c) if !byte.is_ascii_control() => { - output[0] = c; - (output, 0..1) - } - Some(ascii::Char::LineFeed) => backslash(ascii::Char::SmallN), - Some(ascii::Char::CarriageReturn) => backslash(ascii::Char::SmallR), - Some(ascii::Char::CharacterTabulation) => backslash(ascii::Char::SmallT), - _ => { - let hi = HEX_DIGITS[(byte >> 4) as usize]; - let lo = HEX_DIGITS[(byte & 0xf) as usize]; - - output[0] = ascii::Char::ReverseSolidus; - output[1] = ascii::Char::SmallX; - output[2] = hi; - output[3] = lo; - - (output, 0..4) + let mut output = [ascii::Char::ReverseSolidus; N]; + output[1] = ascii::Char::SmallX; + output[2] = HEX_DIGITS[(byte >> 4) as usize]; + output[3] = HEX_DIGITS[(byte & 0b1111) as usize]; + + let len = if byte < 127 { + match byte { + c @ b'\"' | c @ b'\'' | c @ b'\\' => { + output[1] = c.as_ascii().unwrap(); + 2 + } + c @ 0x20..0x7f => { + output[0] = c.as_ascii().unwrap(); + 1 + } + b'\n' => { + output[1] = ascii::Char::SmallN; + 2 + } + b'\r' => { + output[1] = ascii::Char::SmallR; + 2 + } + b'\t' => { + output[1] = ascii::Char::SmallT; + 2 + } + _ => 4, } - } + } else { + 4 + }; + + (output, 0..len) } /// Escapes a character `\u{NNNN}` representation. From 798c6525215e2adb88a4efacddaa7f49f8e35cba Mon Sep 17 00:00:00 2001 From: Markus Reiter Date: Sat, 1 Jun 2024 20:59:33 +0200 Subject: [PATCH 5/5] Update escape.rs --- library/core/src/escape.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/library/core/src/escape.rs b/library/core/src/escape.rs index 7dae2c8a3b24b..78676a5c142e1 100644 --- a/library/core/src/escape.rs +++ b/library/core/src/escape.rs @@ -35,7 +35,7 @@ const fn escape_ascii(byte: u8) -> ([ascii::Char; N], Range) output[1] = c.as_ascii().unwrap(); 2 } - c @ 0x20..0x7f => { + c @ 0x20..=0x7e => { output[0] = c.as_ascii().unwrap(); 1 }