Skip to content

Commit 9a2c144

Browse files
authored
fast path next for String type (#16914)
ref parse_int test in #16128
1 parent a413a0a commit 9a2c144

File tree

1 file changed

+22
-11
lines changed

1 file changed

+22
-11
lines changed

base/strings/string.jl

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ function length(s::String)
4747
cnum
4848
end
4949

50-
function next(s::String, i::Int)
50+
@noinline function slow_utf8_next(d::Vector{UInt8}, b::UInt8, i::Int)
5151
# potentially faster version
5252
# d = s.data
5353
# a::UInt32 = d[i]
@@ -59,31 +59,42 @@ function next(s::String, i::Int)
5959
# if a < 0xf0; return Char(c - 0x000e2080); end
6060
# return Char(c<<6 + d[i+3] - 0x03c82080)
6161

62-
d = s.data
63-
b = d[i]
6462
if is_valid_continuation(b)
6563
throw(UnicodeError(UTF_ERR_INVALID_INDEX, i, d[i]))
6664
end
67-
trailing = utf8_trailing[b+1]
65+
trailing = utf8_trailing[b + 1]
6866
if length(d) < i + trailing
6967
return '\ufffd', i+1
7068
end
7169
c::UInt32 = 0
72-
for j = 1:trailing+1
70+
for j = 1:(trailing + 1)
7371
c <<= 6
7472
c += d[i]
7573
i += 1
7674
end
77-
c -= utf8_offset[trailing+1]
78-
Char(c), i
75+
c -= utf8_offset[trailing + 1]
76+
return Char(c), i
77+
end
78+
79+
@inline function next(s::String, i::Int)
80+
# function is split into this critical fast-path
81+
# for pure ascii data, such as parsing numbers,
82+
# and a longer function that can handle any utf8 data
83+
d = s.data
84+
b = d[i]
85+
if b < 0x80
86+
return Char(b), i + 1
87+
end
88+
return slow_utf8_next(d, b, i)
7989
end
8090

8191
function first_utf8_byte(ch::Char)
8292
c = UInt32(ch)
83-
c < 0x80 ? c%UInt8 :
84-
c < 0x800 ? ((c>>6) | 0xc0)%UInt8 :
85-
c < 0x10000 ? ((c>>12) | 0xe0)%UInt8 :
86-
((c>>18) | 0xf0)%UInt8
93+
b = c < 0x80 ? c%UInt8 :
94+
c < 0x800 ? ((c>>6) | 0xc0)%UInt8 :
95+
c < 0x10000 ? ((c>>12) | 0xe0)%UInt8 :
96+
((c>>18) | 0xf0)%UInt8
97+
return b
8798
end
8899

89100
function reverseind(s::String, i::Integer)

0 commit comments

Comments
 (0)