@@ -47,7 +47,7 @@ function length(s::String)
47
47
cnum
48
48
end
49
49
50
- function next (s :: String , i:: Int )
50
+ @noinline function slow_utf8_next (d :: Vector{UInt8} , b :: UInt8 , i:: Int )
51
51
# potentially faster version
52
52
# d = s.data
53
53
# a::UInt32 = d[i]
@@ -59,31 +59,42 @@ function next(s::String, i::Int)
59
59
# if a < 0xf0; return Char(c - 0x000e2080); end
60
60
# return Char(c<<6 + d[i+3] - 0x03c82080)
61
61
62
- d = s. data
63
- b = d[i]
64
62
if is_valid_continuation (b)
65
63
throw (UnicodeError (UTF_ERR_INVALID_INDEX, i, d[i]))
66
64
end
67
- trailing = utf8_trailing[b+ 1 ]
65
+ trailing = utf8_trailing[b + 1 ]
68
66
if length (d) < i + trailing
69
67
return ' \u fffd' , i+ 1
70
68
end
71
69
c:: UInt32 = 0
72
- for j = 1 : trailing+ 1
70
+ for j = 1 : ( trailing + 1 )
73
71
c <<= 6
74
72
c += d[i]
75
73
i += 1
76
74
end
77
- c -= utf8_offset[trailing+ 1 ]
78
- Char (c), i
75
+ c -= utf8_offset[trailing + 1 ]
76
+ return Char (c), i
77
+ end
78
+
79
+ @inline function next (s:: String , i:: Int )
80
+ # function is split into this critical fast-path
81
+ # for pure ascii data, such as parsing numbers,
82
+ # and a longer function that can handle any utf8 data
83
+ d = s. data
84
+ b = d[i]
85
+ if b < 0x80
86
+ return Char (b), i + 1
87
+ end
88
+ return slow_utf8_next (d, b, i)
79
89
end
80
90
81
91
function first_utf8_byte (ch:: Char )
82
92
c = UInt32 (ch)
83
- c < 0x80 ? c% UInt8 :
84
- c < 0x800 ? ((c>> 6 ) | 0xc0 )% UInt8 :
85
- c < 0x10000 ? ((c>> 12 ) | 0xe0 )% UInt8 :
86
- ((c>> 18 ) | 0xf0 )% UInt8
93
+ b = c < 0x80 ? c% UInt8 :
94
+ c < 0x800 ? ((c>> 6 ) | 0xc0 )% UInt8 :
95
+ c < 0x10000 ? ((c>> 12 ) | 0xe0 )% UInt8 :
96
+ ((c>> 18 ) | 0xf0 )% UInt8
97
+ return b
87
98
end
88
99
89
100
function reverseind (s:: String , i:: Integer )
0 commit comments