1
1
//! Manage xml character escapes
2
2
3
- use memchr;
4
3
use std:: borrow:: Cow ;
5
4
use std:: collections:: HashMap ;
6
5
use std:: ops:: Range ;
7
6
7
+ use jetscii:: bytes;
8
+ use memchr;
9
+ use once_cell:: sync:: Lazy ;
10
+
8
11
#[ cfg( test) ]
9
12
use pretty_assertions:: assert_eq;
10
13
14
+ static XML_ESCAPE_BYTES : Lazy < jetscii:: BytesConst > =
15
+ Lazy :: new ( || bytes ! ( b'<' , b'>' , b'&' , b'\'' , b'"' ) ) ;
16
+ static XML_PARTIAL_ESCAPE_BYTES : Lazy < jetscii:: BytesConst > = Lazy :: new ( || bytes ! ( b'<' , b'>' , b'&' ) ) ;
17
+
11
18
/// Error for XML escape/unescqpe.
12
19
#[ derive( Debug ) ]
13
20
pub enum EscapeError {
@@ -66,31 +73,17 @@ impl std::error::Error for EscapeError {}
66
73
/// Escapes a `&[u8]` and replaces all xml special characters (<, >, &, ', ") with their
67
74
/// corresponding xml escaped value.
68
75
pub fn escape ( raw : & [ u8 ] ) -> Cow < [ u8 ] > {
69
- #[ inline]
70
- fn to_escape ( b : u8 ) -> bool {
71
- match b {
72
- b'<' | b'>' | b'\'' | b'&' | b'"' => true ,
73
- _ => false ,
74
- }
75
- }
76
-
77
- _escape ( raw, to_escape)
76
+ // _escape(raw, |ch| matches!(ch, b'<' | b'>' | b'\'' | b'&' | b'"'))
77
+ simd_escape ( raw, & XML_ESCAPE_BYTES )
78
78
}
79
79
80
80
/// Should only be used for escaping text content. In xml text content, it is allowed
81
81
/// (though not recommended) to leave the quote special characters " and ' unescaped.
82
82
/// This function escapes a `&[u8]` and replaces xml special characters (<, >, &) with
83
83
/// their corresponding xml escaped value, but does not escape quote characters.
84
84
pub fn partial_escape ( raw : & [ u8 ] ) -> Cow < [ u8 ] > {
85
- #[ inline]
86
- fn to_escape ( b : u8 ) -> bool {
87
- match b {
88
- b'<' | b'>' | b'&' => true ,
89
- _ => false ,
90
- }
91
- }
92
-
93
- _escape ( raw, to_escape)
85
+ // _escape(raw, |ch| matches!(ch, b'<' | b'>' | b'&'))
86
+ simd_escape ( raw, & XML_PARTIAL_ESCAPE_BYTES )
94
87
}
95
88
96
89
/// Escapes a `&[u8]` and replaces a subset of xml special characters (<, >, &, ', ") with their
@@ -112,7 +105,46 @@ fn _escape<F: Fn(u8) -> bool>(raw: &[u8], escape_chars: F) -> Cow<[u8]> {
112
105
b'\'' => escaped. extend_from_slice ( b"'" ) ,
113
106
b'&' => escaped. extend_from_slice ( b"&" ) ,
114
107
b'"' => escaped. extend_from_slice ( b""" ) ,
115
- _ => unreachable ! ( "Only '<', '>','\' , '&' and '\" ' are escaped" ) ,
108
+ c @ _ => unreachable ! (
109
+ "Found {} but only '<', '>', ', '&' and '\" ' are escaped" ,
110
+ c as char
111
+ ) ,
112
+ }
113
+ pos = new_pos + 1 ;
114
+ }
115
+
116
+ if let Some ( mut escaped) = escaped {
117
+ if let Some ( raw) = raw. get ( pos..) {
118
+ escaped. extend_from_slice ( raw) ;
119
+ }
120
+ Cow :: Owned ( escaped)
121
+ } else {
122
+ Cow :: Borrowed ( raw)
123
+ }
124
+ }
125
+
126
+ /// Escapes a `&[u8]` and replaces all xml special characters (<, >, &, ', ") with their
127
+ /// corresponding xml escaped value.
128
+ pub fn simd_escape < ' a > ( raw : & ' a [ u8 ] , escape_matcher : & jetscii:: BytesConst ) -> Cow < ' a , [ u8 ] > {
129
+ let mut escaped = None ;
130
+ let mut pos = 0 ;
131
+ while let Some ( i) = escape_matcher. find ( & raw [ pos..] ) {
132
+ if escaped. is_none ( ) {
133
+ escaped = Some ( Vec :: with_capacity ( raw. len ( ) ) ) ;
134
+ }
135
+ let escaped = escaped. as_mut ( ) . expect ( "initialized" ) ;
136
+ let new_pos = pos + i;
137
+ escaped. extend_from_slice ( & raw [ pos..new_pos] ) ;
138
+ match raw[ new_pos] {
139
+ b'<' => escaped. extend_from_slice ( b"<" ) ,
140
+ b'>' => escaped. extend_from_slice ( b">" ) ,
141
+ b'\'' => escaped. extend_from_slice ( b"'" ) ,
142
+ b'&' => escaped. extend_from_slice ( b"&" ) ,
143
+ b'"' => escaped. extend_from_slice ( b""" ) ,
144
+ c @ _ => unreachable ! (
145
+ "Found {} but only '<', '>', ', '&' and '\" ' are escaped" ,
146
+ c as char
147
+ ) ,
116
148
}
117
149
pos = new_pos + 1 ;
118
150
}
0 commit comments