Skip to content

Commit ffb4ec4

Browse files
committed
Merge pull request JuliaLang#11925 from ScottPJones/spj/string
Reorganize base/string.jl, base/utf*, test/strings.jl, test/unicode.jl
2 parents d73886e + 38c6925 commit ffb4ec4

35 files changed

+3919
-3956
lines changed

base/hashing2.jl

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,3 +166,15 @@ end
166166
## hashing Float16s ##
167167

168168
hash(x::Float16, h::UInt) = hash(Float64(x), h)
169+
170+
## hashing strings ##
171+
172+
const memhash = UInt === UInt64 ? :memhash_seed : :memhash32_seed
173+
const memhash_seed = UInt === UInt64 ? 0x71e729fd56419c81 : 0x56419c81
174+
175+
function hash{T<:ByteString}(s::Union{T,SubString{T}}, h::UInt)
176+
h += memhash_seed
177+
# note: use pointer(s) here (see #6058).
178+
ccall(memhash, UInt, (Ptr{UInt8}, Csize_t, UInt32), pointer(s), sizeof(s), h % UInt32) + h
179+
end
180+
hash(s::AbstractString, h::UInt) = hash(bytestring(s), h)

base/parse.jl

Lines changed: 199 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,199 @@
1+
# This file is a part of Julia. License is MIT: http://julialang.org/license
2+
3+
## string to integer functions ##
4+
5+
function parse{T<:Integer}(::Type{T}, c::Char, base::Integer=36)
6+
a::Int = (base <= 36 ? 10 : 36)
7+
2 <= base <= 62 || throw(ArgumentError("invalid base: base must be 2 ≤ base ≤ 62, got $base"))
8+
d = '0' <= c <= '9' ? c-'0' :
9+
'A' <= c <= 'Z' ? c-'A'+10 :
10+
'a' <= c <= 'z' ? c-'a'+a : throw(ArgumentError("invalid digit: $(repr(c))"))
11+
d < base || throw(ArgumentError("invalid base $base digit $(repr(c))"))
12+
convert(T, d)
13+
end
14+
15+
function parseint_next(s::AbstractString, startpos::Int, endpos::Int)
16+
(0 < startpos <= endpos) || (return Char(0), 0, 0)
17+
j = startpos
18+
c, startpos = next(s,startpos)
19+
c, startpos, j
20+
end
21+
22+
function parseint_preamble(signed::Bool, base::Int, s::AbstractString, startpos::Int, endpos::Int)
23+
c, i, j = parseint_next(s, startpos, endpos)
24+
25+
while isspace(c)
26+
c, i, j = parseint_next(s,i,endpos)
27+
end
28+
(j == 0) && (return 0, 0, 0)
29+
30+
sgn = 1
31+
if signed
32+
if c == '-' || c == '+'
33+
(c == '-') && (sgn = -1)
34+
c, i, j = parseint_next(s,i,endpos)
35+
end
36+
end
37+
38+
while isspace(c)
39+
c, i, j = parseint_next(s,i,endpos)
40+
end
41+
(j == 0) && (return 0, 0, 0)
42+
43+
if base == 0
44+
if c == '0' && !done(s,i)
45+
c, i = next(s,i)
46+
base = c=='b' ? 2 : c=='o' ? 8 : c=='x' ? 16 : 10
47+
if base != 10
48+
c, i, j = parseint_next(s,i,endpos)
49+
end
50+
else
51+
base = 10
52+
end
53+
end
54+
return sgn, base, j
55+
end
56+
57+
function tryparse_internal{S<:ByteString}(::Type{Bool}, sbuff::S, startpos::Int, endpos::Int, raise::Bool)
58+
len = endpos-startpos+1
59+
p = pointer(sbuff)+startpos-1
60+
(len == 4) && (0 == ccall(:memcmp, Int32, (Ptr{UInt8}, Ptr{UInt8}, UInt), p, "true", 4)) && (return Nullable(true))
61+
(len == 5) && (0 == ccall(:memcmp, Int32, (Ptr{UInt8}, Ptr{UInt8}, UInt), p, "false", 5)) && (return Nullable(false))
62+
raise && throw(ArgumentError("invalid Bool representation: $(repr(SubString(s,startpos,endpos)))"))
63+
Nullable{Bool}()
64+
end
65+
66+
safe_add{T<:Integer}(n1::T, n2::T) = ((n2 > 0) ? (n1 > (typemax(T) - n2)) : (n1 < (typemin(T) - n2))) ? Nullable{T}() : Nullable{T}(n1 + n2)
67+
safe_mul{T<:Integer}(n1::T, n2::T) = ((n2 > 0) ? ((n1 > div(typemax(T),n2)) || (n1 < div(typemin(T),n2))) :
68+
(n2 < -1) ? ((n1 > div(typemin(T),n2)) || (n1 < div(typemax(T),n2))) :
69+
((n2 == -1) && n1 == typemin(T))) ? Nullable{T}() : Nullable{T}(n1 * n2)
70+
71+
function tryparse_internal{T<:Integer}(::Type{T}, s::AbstractString, startpos::Int, endpos::Int, base::Int, a::Int, raise::Bool)
72+
_n = Nullable{T}()
73+
sgn, base, i = parseint_preamble(T<:Signed, base, s, startpos, endpos)
74+
if i == 0
75+
raise && throw(ArgumentError("premature end of integer: $(repr(SubString(s,startpos,endpos)))"))
76+
return _n
77+
end
78+
c, i = parseint_next(s,i,endpos)
79+
if i == 0
80+
raise && throw(ArgumentError("premature end of integer: $(repr(SubString(s,startpos,endpos)))"))
81+
return _n
82+
end
83+
84+
base = convert(T,base)
85+
m::T = div(typemax(T)-base+1,base)
86+
n::T = 0
87+
while n <= m
88+
d::T = '0' <= c <= '9' ? c-'0' :
89+
'A' <= c <= 'Z' ? c-'A'+10 :
90+
'a' <= c <= 'z' ? c-'a'+a : base
91+
if d >= base
92+
raise && throw(ArgumentError("invalid base $base digit $(repr(c)) in $(repr(SubString(s,startpos,endpos)))"))
93+
return _n
94+
end
95+
n *= base
96+
n += d
97+
if i > endpos
98+
n *= sgn
99+
return Nullable{T}(n)
100+
end
101+
c, i = next(s,i)
102+
isspace(c) && break
103+
end
104+
(T <: Signed) && (n *= sgn)
105+
while !isspace(c)
106+
d::T = '0' <= c <= '9' ? c-'0' :
107+
'A' <= c <= 'Z' ? c-'A'+10 :
108+
'a' <= c <= 'z' ? c-'a'+a : base
109+
if d >= base
110+
raise && throw(ArgumentError("invalid base $base digit $(repr(c)) in $(repr(SubString(s,startpos,endpos)))"))
111+
return _n
112+
end
113+
(T <: Signed) && (d *= sgn)
114+
115+
safe_n = safe_mul(n, base)
116+
isnull(safe_n) || (safe_n = safe_add(get(safe_n), d))
117+
if isnull(safe_n)
118+
raise && throw(OverflowError())
119+
return _n
120+
end
121+
n = get(safe_n)
122+
(i > endpos) && return Nullable{T}(n)
123+
c, i = next(s,i)
124+
end
125+
while i <= endpos
126+
c, i = next(s,i)
127+
if !isspace(c)
128+
raise && throw(ArgumentError("extra characters after whitespace in $(repr(SubString(s,startpos,endpos)))"))
129+
return _n
130+
end
131+
end
132+
return Nullable{T}(n)
133+
end
134+
tryparse_internal{T<:Integer}(::Type{T}, s::AbstractString, base::Int, raise::Bool) =
135+
tryparse_internal(T,s,start(s),endof(s),base,raise)
136+
tryparse_internal{T<:Integer}(::Type{T}, s::AbstractString, startpos::Int, endpos::Int, base::Int, raise::Bool) =
137+
tryparse_internal(T, s, startpos, endpos, base, base <= 36 ? 10 : 36, raise)
138+
tryparse{T<:Integer}(::Type{T}, s::AbstractString, base::Int) =
139+
2 <= base <= 62 ? tryparse_internal(T,s,Int(base),false) : throw(ArgumentError("invalid base: base must be 2 ≤ base ≤ 62, got $base"))
140+
tryparse{T<:Integer}(::Type{T}, s::AbstractString) = tryparse_internal(T,s,0,false)
141+
142+
function parse{T<:Integer}(::Type{T}, s::AbstractString, base::Integer)
143+
(2 <= base <= 62) || throw(ArgumentError("invalid base: base must be 2 ≤ base ≤ 62, got $base"))
144+
get(tryparse_internal(T, s, base, true))
145+
end
146+
parse{T<:Integer}(::Type{T}, s::AbstractString) = get(tryparse_internal(T, s, 0, true))
147+
148+
## stringifying integers more efficiently ##
149+
150+
string(x::Union{Int8,Int16,Int32,Int64,Int128}) = dec(x)
151+
152+
## string to float functions ##
153+
154+
tryparse(::Type{Float64}, s::ByteString) = ccall(:jl_try_substrtod, Nullable{Float64}, (Ptr{UInt8},Csize_t,Csize_t), s, 0, sizeof(s))
155+
tryparse{T<:ByteString}(::Type{Float64}, s::SubString{T}) = ccall(:jl_try_substrtod, Nullable{Float64}, (Ptr{UInt8},Csize_t,Csize_t), s.string, s.offset, s.endof)
156+
157+
tryparse(::Type{Float32}, s::ByteString) = ccall(:jl_try_substrtof, Nullable{Float32}, (Ptr{UInt8},Csize_t,Csize_t), s, 0, sizeof(s))
158+
tryparse{T<:ByteString}(::Type{Float32}, s::SubString{T}) = ccall(:jl_try_substrtof, Nullable{Float32}, (Ptr{UInt8},Csize_t,Csize_t), s.string, s.offset, s.endof)
159+
160+
tryparse{T<:Union{Float32,Float64}}(::Type{T}, s::AbstractString) = tryparse(T, bytestring(s))
161+
162+
function parse{T<:FloatingPoint}(::Type{T}, s::AbstractString)
163+
nf = tryparse(T, s)
164+
isnull(nf) ? throw(ArgumentError("invalid number format $(repr(s)) for $T")) : get(nf)
165+
end
166+
167+
float(x::AbstractString) = parse(Float64,x)
168+
169+
float{S<:AbstractString}(a::AbstractArray{S}) = map!(float, similar(a,typeof(float(0))), a)
170+
171+
## interface to parser ##
172+
173+
function parse(str::AbstractString, pos::Int; greedy::Bool=true, raise::Bool=true)
174+
# returns (expr, end_pos). expr is () in case of parse error.
175+
bstr = bytestring(str)
176+
ex, pos = ccall(:jl_parse_string, Any,
177+
(Ptr{UInt8}, Csize_t, Int32, Int32),
178+
bstr, sizeof(bstr), pos-1, greedy ? 1:0)
179+
if raise && isa(ex,Expr) && is(ex.head,:error)
180+
throw(ParseError(ex.args[1]))
181+
end
182+
if ex == ()
183+
raise && throw(ParseError("end of input"))
184+
ex = Expr(:error, "end of input")
185+
end
186+
ex, pos+1 # C is zero-based, Julia is 1-based
187+
end
188+
189+
function parse(str::AbstractString; raise::Bool=true)
190+
ex, pos = parse(str, start(str), greedy=true, raise=raise)
191+
if isa(ex,Expr) && ex.head === :error
192+
return ex
193+
end
194+
if !done(str, pos)
195+
raise && throw(ParseError("extra token after end of expression"))
196+
return Expr(:error, "extra token after end of expression")
197+
end
198+
return ex
199+
end

base/shell.jl

Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
# This file is a part of Julia. License is MIT: http://julialang.org/license
2+
3+
## shell-like command parsing ##
4+
5+
function shell_parse(raw::AbstractString, interp::Bool)
6+
s = lstrip(raw)
7+
#Strips the end but respects the space when the string endswith "\\ "
8+
r = RevString(s)
9+
i = start(r)
10+
c_old = nothing
11+
while !done(r,i)
12+
c, j = next(r,i)
13+
if c == '\\' && c_old == ' '
14+
i -= 1
15+
break
16+
elseif !(c in _default_delims)
17+
break
18+
end
19+
i = j
20+
c_old = c
21+
end
22+
s = s[1:end-i+1]
23+
24+
last_parse = 0:-1
25+
isempty(s) && return interp ? (Expr(:tuple,:()),last_parse) : ([],last_parse)
26+
27+
in_single_quotes = false
28+
in_double_quotes = false
29+
30+
args::Vector{Any} = []
31+
arg::Vector{Any} = []
32+
i = start(s)
33+
j = i
34+
35+
function update_arg(x)
36+
if !isa(x,AbstractString) || !isempty(x)
37+
push!(arg, x)
38+
end
39+
end
40+
function append_arg()
41+
if isempty(arg); arg = Any["",]; end
42+
push!(args, arg)
43+
arg = []
44+
end
45+
46+
while !done(s,j)
47+
c, k = next(s,j)
48+
if !in_single_quotes && !in_double_quotes && isspace(c)
49+
update_arg(s[i:j-1])
50+
append_arg()
51+
j = k
52+
while !done(s,j)
53+
c, k = next(s,j)
54+
if !isspace(c)
55+
i = j
56+
break
57+
end
58+
j = k
59+
end
60+
elseif interp && !in_single_quotes && c == '$'
61+
update_arg(s[i:j-1]); i = k; j = k
62+
if done(s,k)
63+
error("\$ right before end of command")
64+
end
65+
if isspace(s[k])
66+
error("space not allowed right after \$")
67+
end
68+
stpos = j
69+
ex, j = parse(s,j,greedy=false)
70+
last_parse = stpos:j
71+
update_arg(esc(ex)); i = j
72+
else
73+
if !in_double_quotes && c == '\''
74+
in_single_quotes = !in_single_quotes
75+
update_arg(s[i:j-1]); i = k
76+
elseif !in_single_quotes && c == '"'
77+
in_double_quotes = !in_double_quotes
78+
update_arg(s[i:j-1]); i = k
79+
elseif c == '\\'
80+
if in_double_quotes
81+
if done(s,k)
82+
error("unterminated double quote")
83+
end
84+
if s[k] == '"' || s[k] == '$'
85+
update_arg(s[i:j-1]); i = k
86+
c, k = next(s,k)
87+
end
88+
elseif !in_single_quotes
89+
if done(s,k)
90+
error("dangling backslash")
91+
end
92+
update_arg(s[i:j-1]); i = k
93+
c, k = next(s,k)
94+
end
95+
end
96+
j = k
97+
end
98+
end
99+
100+
if in_single_quotes; error("unterminated single quote"); end
101+
if in_double_quotes; error("unterminated double quote"); end
102+
103+
update_arg(s[i:end])
104+
append_arg()
105+
106+
if !interp
107+
return (args,last_parse)
108+
end
109+
110+
# construct an expression
111+
ex = Expr(:tuple)
112+
for arg in args
113+
push!(ex.args, Expr(:tuple, arg...))
114+
end
115+
(ex,last_parse)
116+
end
117+
shell_parse(s::AbstractString) = shell_parse(s,true)
118+
119+
function shell_split(s::AbstractString)
120+
parsed = shell_parse(s,false)[1]
121+
args = AbstractString[]
122+
for arg in parsed
123+
push!(args, string(arg...))
124+
end
125+
args
126+
end
127+
128+
function print_shell_word(io::IO, word::AbstractString)
129+
if isempty(word)
130+
print(io, "''")
131+
end
132+
has_single = false
133+
has_special = false
134+
for c in word
135+
if isspace(c) || c=='\\' || c=='\'' || c=='"' || c=='$'
136+
has_special = true
137+
if c == '\''
138+
has_single = true
139+
end
140+
end
141+
end
142+
if !has_special
143+
print(io, word)
144+
elseif !has_single
145+
print(io, '\'', word, '\'')
146+
else
147+
print(io, '"')
148+
for c in word
149+
if c == '"' || c == '$'
150+
print(io, '\\')
151+
end
152+
print(io, c)
153+
end
154+
print(io, '"')
155+
end
156+
end
157+
158+
function print_shell_escaped(io::IO, cmd::AbstractString, args::AbstractString...)
159+
print_shell_word(io, cmd)
160+
for arg in args
161+
print(io, ' ')
162+
print_shell_word(io, arg)
163+
end
164+
end
165+
print_shell_escaped(io::IO) = nothing
166+
167+
shell_escape(args::AbstractString...) = sprint(print_shell_escaped, args...)

0 commit comments

Comments
 (0)