@@ -32,8 +32,7 @@ impl Iterator<Node> for Tokenizer {
32
32
33
33
#[ inline]
34
34
fn preprocess ( input : & str ) -> String {
35
- // TODO: Is this faster if done in one pass?
36
- input. replace ( "\r \n " , "\n " ) . replace ( "\r " , "\n " ) . replace ( "\x0C " , "\n " )
35
+ input. into_string ( )
37
36
}
38
37
39
38
@@ -43,6 +42,12 @@ pub struct Tokenizer {
43
42
position : uint , // All counted in bytes, not characters
44
43
}
45
44
45
+ macro_rules! is_match(
46
+ ( $value: expr, $( $pattern: pat) |+) => (
47
+ match $value { $( $pattern) |+ => true , _ => false }
48
+ ) ;
49
+ )
50
+
46
51
47
52
impl Tokenizer {
48
53
#[ inline]
@@ -57,6 +62,12 @@ impl Tokenizer {
57
62
self . input . as_slice ( ) . char_at ( self . position + offset)
58
63
}
59
64
65
+ #[ inline]
66
+ fn has_newline_at ( & self , offset : uint ) -> bool {
67
+ self . position + offset < self . length &&
68
+ is_match ! ( self . char_at( offset) , '\n' | '\r' | '\x0C' )
69
+ }
70
+
60
71
#[ inline]
61
72
fn consume_char ( & mut self ) -> char {
62
73
let range = self . input . as_slice ( ) . char_range_at ( self . position ) ;
@@ -70,12 +81,6 @@ impl Tokenizer {
70
81
}
71
82
}
72
83
73
- macro_rules! is_match(
74
- ( $value: expr, $( $pattern: pat) |+) => (
75
- match $value { $( $pattern) |+ => true , _ => false }
76
- ) ;
77
- )
78
-
79
84
80
85
fn next_component_value ( tokenizer : & mut Tokenizer ) -> Option < Node > {
81
86
consume_comments ( tokenizer) ;
@@ -89,10 +94,10 @@ fn next_component_value(tokenizer: &mut Tokenizer) -> Option<Node> {
89
94
} ;
90
95
let c = tokenizer. current_char ( ) ;
91
96
let component_value = match c {
92
- '\t' | '\n' | ' ' => {
97
+ '\t' | '\n' | ' ' | '\r' | '\x0C' => {
93
98
while !tokenizer. is_eof ( ) {
94
99
match tokenizer. current_char ( ) {
95
- ' ' | '\t' | '\n' => tokenizer. position += 1 ,
100
+ ' ' | '\t' | '\n' | '\r' | '\x0C' => tokenizer. position += 1 ,
96
101
_ => break ,
97
102
}
98
103
}
@@ -104,7 +109,7 @@ fn next_component_value(tokenizer: &mut Tokenizer) -> Option<Node> {
104
109
if is_ident_start ( tokenizer) { IDHash ( consume_name ( tokenizer) ) }
105
110
else if !tokenizer. is_eof ( ) && match tokenizer. current_char ( ) {
106
111
'a' ...'z' | 'A' ...'Z' | '0' ...'9' | '-' | '_' => true ,
107
- '\\' => !tokenizer. starts_with ( " \\ \n " ) ,
112
+ '\\' => !tokenizer. has_newline_at ( 1 ) ,
108
113
_ => c > '\x7F' , // Non-ASCII
109
114
} { Hash ( consume_name ( tokenizer) ) }
110
115
else { Delim ( c) }
@@ -193,7 +198,7 @@ fn next_component_value(tokenizer: &mut Tokenizer) -> Option<Node> {
193
198
'a' ...'z' | 'A' ...'Z' | '_' | '\0' => consume_ident_like ( tokenizer) ,
194
199
'[' => SquareBracketBlock ( consume_block ( tokenizer, CloseSquareBracket ) ) ,
195
200
'\\' => {
196
- if !tokenizer. starts_with ( " \\ \n " ) { consume_ident_like ( tokenizer) }
201
+ if !tokenizer. has_newline_at ( 1 ) { consume_ident_like ( tokenizer) }
197
202
else { tokenizer. position += 1 ; Delim ( c) }
198
203
} ,
199
204
']' => { tokenizer. position += 1 ; CloseSquareBracket } ,
@@ -289,16 +294,23 @@ fn consume_quoted_string(tokenizer: &mut Tokenizer, single_quote: bool) -> Resul
289
294
match tokenizer. consume_char ( ) {
290
295
'"' if !single_quote => break ,
291
296
'\'' if single_quote => break ,
292
- '\n' => {
297
+ '\n' | '\r' | '\x0C' => {
293
298
tokenizer. position -= 1 ;
294
299
return Err ( ( ) ) ;
295
300
} ,
296
301
'\\' => {
297
302
if !tokenizer. is_eof ( ) {
298
- if tokenizer. current_char ( ) == '\n' { // Escaped newline
299
- tokenizer. position += 1 ;
303
+ match tokenizer. current_char ( ) {
304
+ // Escaped newline
305
+ '\n' | '\x0C' => tokenizer. position += 1 ,
306
+ '\r' => {
307
+ tokenizer. position += 1 ;
308
+ if !tokenizer. is_eof ( ) && tokenizer. current_char ( ) == '\n' {
309
+ tokenizer. position += 1 ;
310
+ }
311
+ }
312
+ _ => string. push ( consume_escape ( tokenizer) )
300
313
}
301
- else { string. push ( consume_escape ( tokenizer) ) }
302
314
}
303
315
// else: escaped EOF, do nothing.
304
316
}
@@ -316,10 +328,10 @@ fn is_ident_start(tokenizer: &mut Tokenizer) -> bool {
316
328
'a' ...'z' | 'A' ...'Z' | '_' | '\0' => true ,
317
329
'-' => tokenizer. position + 1 < tokenizer. length && match tokenizer. char_at ( 1 ) {
318
330
'a' ...'z' | 'A' ...'Z' | '_' | '\0' => true ,
319
- '\\' => !tokenizer. input . as_slice ( ) . slice_from ( tokenizer . position + 1 ) . starts_with ( " \\ \n " ) ,
331
+ '\\' => !tokenizer. has_newline_at ( 1 ) ,
320
332
c => c > '\x7F' , // Non-ASCII
321
333
} ,
322
- '\\' => !tokenizer. starts_with ( " \\ \n " ) ,
334
+ '\\' => !tokenizer. has_newline_at ( 1 ) ,
323
335
c => c > '\x7F' , // Non-ASCII
324
336
}
325
337
}
@@ -342,7 +354,7 @@ fn consume_name(tokenizer: &mut Tokenizer) -> String {
342
354
value. push ( match c {
343
355
'a' ...'z' | 'A' ...'Z' | '0' ...'9' | '_' | '-' => { tokenizer. position += 1 ; c } ,
344
356
'\\' => {
345
- if tokenizer. starts_with ( " \\ \n " ) { break }
357
+ if tokenizer. has_newline_at ( 1 ) { break }
346
358
tokenizer. position += 1 ;
347
359
consume_escape ( tokenizer)
348
360
} ,
@@ -435,7 +447,7 @@ fn consume_url(tokenizer: &mut Tokenizer) -> ComponentValue {
435
447
tokenizer. position += 1 ; // Skip the ( of url(
436
448
while !tokenizer. is_eof ( ) {
437
449
match tokenizer. current_char ( ) {
438
- ' ' | '\t' | '\n' => tokenizer. position += 1 ,
450
+ ' ' | '\t' | '\n' | '\r' | '\x0C' => tokenizer. position += 1 ,
439
451
'"' => return consume_quoted_url ( tokenizer, false ) ,
440
452
'\'' => return consume_quoted_url ( tokenizer, true ) ,
441
453
')' => { tokenizer. position += 1 ; break } ,
@@ -455,12 +467,12 @@ fn consume_url(tokenizer: &mut Tokenizer) -> ComponentValue {
455
467
let mut string = String :: new ( ) ;
456
468
while !tokenizer. is_eof ( ) {
457
469
let next_char = match tokenizer. consume_char ( ) {
458
- ' ' | '\t' | '\n' => return consume_url_end ( tokenizer, string) ,
470
+ ' ' | '\t' | '\n' | '\r' | '\x0C' => return consume_url_end ( tokenizer, string) ,
459
471
')' => break ,
460
472
'\x01' ...'\x08' | '\x0B' | '\x0E' ...'\x1F' | '\x7F' // non-printable
461
473
| '"' | '\'' | '(' => return consume_bad_url ( tokenizer) ,
462
474
'\\' => {
463
- if ! tokenizer. is_eof ( ) && tokenizer . current_char ( ) == '\n' {
475
+ if tokenizer. has_newline_at ( 0 ) {
464
476
return consume_bad_url ( tokenizer)
465
477
}
466
478
consume_escape ( tokenizer)
@@ -476,7 +488,7 @@ fn consume_url(tokenizer: &mut Tokenizer) -> ComponentValue {
476
488
fn consume_url_end ( tokenizer : & mut Tokenizer , string : String ) -> ComponentValue {
477
489
while !tokenizer. is_eof ( ) {
478
490
match tokenizer. consume_char ( ) {
479
- ' ' | '\t' | '\n' => ( ) ,
491
+ ' ' | '\t' | '\n' | '\r' | '\x0C' => ( ) ,
480
492
')' => break ,
481
493
_ => return consume_bad_url ( tokenizer)
482
494
}
@@ -557,7 +569,13 @@ fn consume_escape(tokenizer: &mut Tokenizer) -> char {
557
569
}
558
570
if !tokenizer. is_eof ( ) {
559
571
match tokenizer. current_char ( ) {
560
- ' ' | '\t' | '\n' => tokenizer. position += 1 ,
572
+ ' ' | '\t' | '\n' | '\x0C' => tokenizer. position += 1 ,
573
+ '\r' => {
574
+ tokenizer. position += 1 ;
575
+ if !tokenizer. is_eof ( ) && tokenizer. current_char ( ) == '\n' {
576
+ tokenizer. position += 1 ;
577
+ }
578
+ }
561
579
_ => ( )
562
580
}
563
581
}
0 commit comments