@@ -29,7 +29,7 @@ mod tests;
29
29
use self :: LiteralKind :: * ;
30
30
use self :: TokenKind :: * ;
31
31
use crate :: cursor:: { Cursor , EOF_CHAR } ;
32
- use std:: convert:: TryInto ;
32
+ use std:: convert:: TryFrom ;
33
33
34
34
/// Parsed token.
35
35
/// It doesn't contain information about data that has been parsed,
@@ -142,84 +142,24 @@ pub enum LiteralKind {
142
142
/// "b"abc"", "b"abc"
143
143
ByteStr { terminated : bool } ,
144
144
/// "r"abc"", "r#"abc"#", "r####"ab"###"c"####", "r#"a"
145
- RawStr ( UnvalidatedRawStr ) ,
145
+ RawStr { n_hashes : u16 , err : Option < RawStrError > } ,
146
146
/// "br"abc"", "br#"abc"#", "br####"ab"###"c"####", "br#"a"
147
- RawByteStr ( UnvalidatedRawStr ) ,
148
- }
149
-
150
- /// Represents something that looks like a raw string, but may have some
151
- /// problems. Use `.validate()` to convert it into something
152
- /// usable.
153
- #[ derive( Clone , Copy , Debug , PartialEq , Eq , PartialOrd , Ord ) ]
154
- pub struct UnvalidatedRawStr {
155
- /// The prefix (`r###"`) is valid
156
- valid_start : bool ,
157
-
158
- /// The postfix (`"###`) is valid
159
- valid_end : bool ,
160
-
161
- /// The number of leading `#`
162
- n_start_hashes : usize ,
163
- /// The number of trailing `#`. `n_end_hashes` <= `n_start_hashes`
164
- n_end_hashes : usize ,
165
- /// The offset starting at `r` or `br` where the user may have intended to end the string.
166
- /// Currently, it is the longest sequence of pattern `"#+"`.
167
- possible_terminator_offset : Option < usize > ,
147
+ RawByteStr { n_hashes : u16 , err : Option < RawStrError > } ,
168
148
}
169
149
170
150
/// Error produced validating a raw string. Represents cases like:
171
- /// - `r##~"abcde"##`: `LexRawStrError:: InvalidStarter`
172
- /// - `r###"abcde"##`: `LexRawStrError:: NoTerminator { expected: 3, found: 2, possible_terminator_offset: Some(11)`
173
- /// - Too many `#`s (>65536 ): `TooManyDelimiters`
151
+ /// - `r##~"abcde"##`: `InvalidStarter`
152
+ /// - `r###"abcde"##`: `NoTerminator { expected: 3, found: 2, possible_terminator_offset: Some(11)`
153
+ /// - Too many `#`s (>65535 ): `TooManyDelimiters`
174
154
#[ derive( Clone , Copy , Debug , PartialEq , Eq , PartialOrd , Ord ) ]
175
- pub enum LexRawStrError {
155
+ pub enum RawStrError {
176
156
/// Non `#` characters exist between `r` and `"` eg. `r#~"..`
177
- InvalidStarter ,
157
+ InvalidStarter { bad_char : char } ,
178
158
/// The string was never terminated. `possible_terminator_offset` is the number of characters after `r` or `br` where they
179
159
/// may have intended to terminate it.
180
160
NoTerminator { expected : usize , found : usize , possible_terminator_offset : Option < usize > } ,
181
- /// More than 65536 `#`s exist.
182
- TooManyDelimiters ,
183
- }
184
-
185
- /// Raw String that contains a valid prefix (`#+"`) and postfix (`"#+`) where
186
- /// there are a matching number of `#` characters in both. Note that this will
187
- /// not consume extra trailing `#` characters: `r###"abcde"####` is lexed as a
188
- /// `ValidatedRawString { n_hashes: 3 }` followed by a `#` token.
189
- #[ derive( Debug , Eq , PartialEq , Copy , Clone ) ]
190
- pub struct ValidatedRawStr {
191
- n_hashes : u16 ,
192
- }
193
-
194
- impl ValidatedRawStr {
195
- pub fn num_hashes ( & self ) -> u16 {
196
- self . n_hashes
197
- }
198
- }
199
-
200
- impl UnvalidatedRawStr {
201
- pub fn validate ( self ) -> Result < ValidatedRawStr , LexRawStrError > {
202
- if !self . valid_start {
203
- return Err ( LexRawStrError :: InvalidStarter ) ;
204
- }
205
-
206
- // Only up to 65535 `#`s are allowed in raw strings
207
- let n_start_safe: u16 =
208
- self . n_start_hashes . try_into ( ) . map_err ( |_| LexRawStrError :: TooManyDelimiters ) ?;
209
-
210
- if self . n_start_hashes > self . n_end_hashes || !self . valid_end {
211
- Err ( LexRawStrError :: NoTerminator {
212
- expected : self . n_start_hashes ,
213
- found : self . n_end_hashes ,
214
- possible_terminator_offset : self . possible_terminator_offset ,
215
- } )
216
- } else {
217
- // Since the lexer should never produce a literal with n_end > n_start, if n_start <= n_end,
218
- // they must be equal.
219
- debug_assert_eq ! ( self . n_start_hashes, self . n_end_hashes) ;
220
- Ok ( ValidatedRawStr { n_hashes : n_start_safe } )
221
- }
222
- }
161
+ /// More than 65535 `#`s exist.
162
+ TooManyDelimiters { found : usize } ,
223
163
}
224
164
225
165
/// Base of numeric literal encoding according to its prefix.
@@ -354,12 +294,12 @@ impl Cursor<'_> {
354
294
'r' => match ( self . first ( ) , self . second ( ) ) {
355
295
( '#' , c1) if is_id_start ( c1) => self . raw_ident ( ) ,
356
296
( '#' , _) | ( '"' , _) => {
357
- let raw_str_i = self . raw_double_quoted_string ( 1 ) ;
297
+ let ( n_hashes , err ) = self . raw_double_quoted_string ( 1 ) ;
358
298
let suffix_start = self . len_consumed ( ) ;
359
- if raw_str_i . n_end_hashes == raw_str_i . n_start_hashes {
299
+ if err . is_none ( ) {
360
300
self . eat_literal_suffix ( ) ;
361
301
}
362
- let kind = RawStr ( raw_str_i ) ;
302
+ let kind = RawStr { n_hashes , err } ;
363
303
Literal { kind, suffix_start }
364
304
}
365
305
_ => self . ident ( ) ,
@@ -389,14 +329,12 @@ impl Cursor<'_> {
389
329
}
390
330
( 'r' , '"' ) | ( 'r' , '#' ) => {
391
331
self . bump ( ) ;
392
- let raw_str_i = self . raw_double_quoted_string ( 2 ) ;
332
+ let ( n_hashes , err ) = self . raw_double_quoted_string ( 2 ) ;
393
333
let suffix_start = self . len_consumed ( ) ;
394
- let terminated = raw_str_i. n_start_hashes == raw_str_i. n_end_hashes ;
395
- if terminated {
334
+ if err. is_none ( ) {
396
335
self . eat_literal_suffix ( ) ;
397
336
}
398
-
399
- let kind = RawByteStr ( raw_str_i) ;
337
+ let kind = RawByteStr { n_hashes, err } ;
400
338
Literal { kind, suffix_start }
401
339
}
402
340
_ => self . ident ( ) ,
@@ -692,27 +630,34 @@ impl Cursor<'_> {
692
630
false
693
631
}
694
632
695
- /// Eats the double-quoted string and returns an `UnvalidatedRawStr`.
696
- fn raw_double_quoted_string ( & mut self , prefix_len : usize ) -> UnvalidatedRawStr {
633
+ /// Eats the double-quoted string and returns `n_hashes` and an error if encountered.
634
+ fn raw_double_quoted_string ( & mut self , prefix_len : usize ) -> ( u16 , Option < RawStrError > ) {
635
+ // Wrap the actual function to handle the error with too many hashes.
636
+ // This way, it eats the whole raw string.
637
+ let ( n_hashes, err) = self . raw_string_unvalidated ( prefix_len) ;
638
+ // Only up to 65535 `#`s are allowed in raw strings
639
+ match u16:: try_from ( n_hashes) {
640
+ Ok ( num) => ( num, err) ,
641
+ // We lie about the number of hashes here :P
642
+ Err ( _) => ( 0 , Some ( RawStrError :: TooManyDelimiters { found : n_hashes } ) ) ,
643
+ }
644
+ }
645
+
646
+ fn raw_string_unvalidated ( & mut self , prefix_len : usize ) -> ( usize , Option < RawStrError > ) {
697
647
debug_assert ! ( self . prev( ) == 'r' ) ;
698
- let mut valid_start: bool = false ;
699
648
let start_pos = self . len_consumed ( ) ;
700
- let ( mut possible_terminator_offset, mut max_hashes) = ( None , 0 ) ;
649
+ let mut possible_terminator_offset = None ;
650
+ let mut max_hashes = 0 ;
701
651
702
652
// Count opening '#' symbols.
703
653
let n_start_hashes = self . eat_while ( |c| c == '#' ) ;
704
654
705
655
// Check that string is started.
706
656
match self . bump ( ) {
707
- Some ( '"' ) => valid_start = true ,
708
- _ => {
709
- return UnvalidatedRawStr {
710
- valid_start,
711
- valid_end : false ,
712
- n_start_hashes,
713
- n_end_hashes : 0 ,
714
- possible_terminator_offset,
715
- } ;
657
+ Some ( '"' ) => ( ) ,
658
+ c => {
659
+ let c = c. unwrap_or ( EOF_CHAR ) ;
660
+ return ( n_start_hashes, Some ( RawStrError :: InvalidStarter { bad_char : c } ) ) ;
716
661
}
717
662
}
718
663
@@ -722,13 +667,14 @@ impl Cursor<'_> {
722
667
self . eat_while ( |c| c != '"' ) ;
723
668
724
669
if self . is_eof ( ) {
725
- return UnvalidatedRawStr {
726
- valid_start,
727
- valid_end : false ,
670
+ return (
728
671
n_start_hashes,
729
- n_end_hashes : max_hashes,
730
- possible_terminator_offset,
731
- } ;
672
+ Some ( RawStrError :: NoTerminator {
673
+ expected : n_start_hashes,
674
+ found : max_hashes,
675
+ possible_terminator_offset,
676
+ } ) ,
677
+ ) ;
732
678
}
733
679
734
680
// Eat closing double quote.
@@ -737,7 +683,7 @@ impl Cursor<'_> {
737
683
// Check that amount of closing '#' symbols
738
684
// is equal to the amount of opening ones.
739
685
// Note that this will not consume extra trailing `#` characters:
740
- // `r###"abcde"####` is lexed as a `LexedRawString { n_hashes: 3 }`
686
+ // `r###"abcde"####` is lexed as a `RawStr { n_hashes: 3 }`
741
687
// followed by a `#` token.
742
688
let mut hashes_left = n_start_hashes;
743
689
let is_closing_hash = |c| {
@@ -751,13 +697,7 @@ impl Cursor<'_> {
751
697
let n_end_hashes = self . eat_while ( is_closing_hash) ;
752
698
753
699
if n_end_hashes == n_start_hashes {
754
- return UnvalidatedRawStr {
755
- valid_start,
756
- valid_end : true ,
757
- n_start_hashes,
758
- n_end_hashes,
759
- possible_terminator_offset : None ,
760
- } ;
700
+ return ( n_start_hashes, None ) ;
761
701
} else if n_end_hashes > max_hashes {
762
702
// Keep track of possible terminators to give a hint about
763
703
// where there might be a missing terminator
0 commit comments