From 7781530b0d7d6e8fa832204f45a56bd7e620a00e Mon Sep 17 00:00:00 2001 From: Andrew Vit Date: Sat, 14 Nov 2020 00:10:51 -0800 Subject: [PATCH] Limit codepoint caching to max 3-byte sequences This keeps the benefit of avoiding sprintf on common codepoints without ballooning the lookup table too much. --- lib/rack/utf8_sanitizer.rb | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/rack/utf8_sanitizer.rb b/lib/rack/utf8_sanitizer.rb index a854590..f8166cf 100644 --- a/lib/rack/utf8_sanitizer.rb +++ b/lib/rack/utf8_sanitizer.rb @@ -250,9 +250,11 @@ def unescape_unreserved(input) # optimized from URI::RFC2396_Parser#escape def escape_unreserved(input) @unsafe_map ||= Hash.new do |table, us| - table[us] = us.each_byte.reduce('') do |tmp, uc| + encoded = us.each_byte.reduce('') do |tmp, uc| tmp << sprintf('%%%02X', uc) end + table[us] = encoded if us.bytesize <= 3 + encoded end input.gsub(UNSAFE, @unsafe_map).force_encoding(Encoding::US_ASCII) end