Skip to content

Commit 92a25f0

Browse files
committed
Reorganize base/string.jl, base/utf*, test/strings.jl, test/unicode.jl
The monolithic string.jl has been split up into several files, and the test files in strings.jl and unicode.jl have been made to correspond with the files of the same names in base. This will prevent a lot of manual merging that was previously necessary.
1 parent 1823f8e commit 92a25f0

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+3875
-3796
lines changed

base/RepStrings.jl

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
# This file is a part of Julia. License is MIT: http://julialang.org/license
2+
3+
## efficient representation of repeated strings ##
4+
5+
immutable RepString <: AbstractString
6+
string::AbstractString
7+
repeat::Integer
8+
end
9+
10+
function endof(s::RepString)
11+
e = endof(s.string)
12+
(next(s.string,e)[2]-1) * (s.repeat-1) + e
13+
end
14+
length(s::RepString) = length(s.string)*s.repeat
15+
sizeof(s::RepString) = sizeof(s.string)*s.repeat
16+
17+
function next(s::RepString, i::Int)
18+
if i < 1
19+
throw(BoundsError(s, i))
20+
end
21+
e = endof(s.string)
22+
sz = next(s.string,e)[2]-1
23+
24+
r, j = divrem(i-1, sz)
25+
j += 1
26+
27+
if r >= s.repeat || j > e
28+
throw(BoundsError(s, i))
29+
end
30+
31+
c, k = next(s.string, j)
32+
c, k-j+i
33+
end
34+
35+
function repeat(s::AbstractString, r::Integer)
36+
r < 0 ? throw(ArgumentError("can't repeat a string $r times")) :
37+
r == 0 ? "" :
38+
r == 1 ? s :
39+
RepString(s,r)
40+
end
41+
42+
convert(::Type{RepString}, s::AbstractString) = RepString(s,1)
43+
44+
function repeat(s::ByteString, r::Integer)
45+
r < 0 && throw(ArgumentError("can't repeat a string $r times"))
46+
d = s.data; n = length(d)
47+
out = Array(UInt8, n*r)
48+
for i=1:r
49+
copy!(out, 1+(i-1)*n, d, 1, n)
50+
end
51+
convert(typeof(s), out)
52+
end

base/RevStrings.jl

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# This file is a part of Julia. License is MIT: http://julialang.org/license
2+
3+
## reversed strings without data movement ##
4+
5+
immutable RevString{T<:AbstractString} <: AbstractString
6+
string::T
7+
end
8+
9+
endof(s::RevString) = endof(s.string)
10+
length(s::RevString) = length(s.string)
11+
sizeof(s::RevString) = sizeof(s.string)
12+
13+
function next(s::RevString, i::Int)
14+
n = endof(s); j = n-i+1
15+
(s.string[j], n-prevind(s.string,j)+1)
16+
end
17+
18+
reverse(s::AbstractString) = RevString(s)
19+
reverse(s::RevString) = s.string
20+
21+
isascii(s::RevString{ASCIIString}) = true
22+
23+
## reverse an index i so that reverse(s)[i] == s[reverseind(s,i)]
24+
25+
reverseind(s::Union{DirectIndexString,SubString{DirectIndexString}}, i::Integer) = length(s) + 1 - i
26+
reverseind(s::RevString, i::Integer) = endof(s) - i + 1
27+
lastidx(s::AbstractString) = nextind(s, endof(s)) - 1
28+
lastidx(s::DirectIndexString) = length(s)
29+
reverseind(s::SubString, i::Integer) =
30+
reverseind(s.string, lastidx(s.string)-s.offset-s.endof+i) - s.offset

base/RopeStrings.jl

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
# This file is a part of Julia. License is MIT: http://julialang.org/license
2+
3+
## ropes for efficient concatenation, etc. ##
4+
5+
immutable RopeString <: AbstractString
6+
head::AbstractString
7+
tail::AbstractString
8+
depth::Int32
9+
endof::Int
10+
11+
RopeString(h::RopeString, t::RopeString) =
12+
strdepth(h.tail) + strdepth(t) < strdepth(h.head) ?
13+
RopeString(h.head, RopeString(h.tail, t)) :
14+
new(h, t, max(h.depth,t.depth)+1, endof(h)+endof(t))
15+
16+
RopeString(h::RopeString, t::AbstractString) =
17+
strdepth(h.tail) < strdepth(h.head) ?
18+
RopeString(h.head, RopeString(h.tail, t)) :
19+
new(h, t, h.depth+1, endof(h)+endof(t))
20+
21+
RopeString(h::AbstractString, t::RopeString) =
22+
strdepth(t.head) < strdepth(t.tail) ?
23+
RopeString(RopeString(h, t.head), t.tail) :
24+
new(h, t, t.depth+1, endof(h)+endof(t))
25+
26+
RopeString(h::AbstractString, t::AbstractString) =
27+
new(h, t, 1, endof(h)+endof(t))
28+
end
29+
RopeString(s::AbstractString) = RopeString(s,"")
30+
31+
strdepth(s::AbstractString) = 0
32+
strdepth(s::RopeString) = s.depth
33+
34+
function next(s::RopeString, i::Int)
35+
eh = endof(s.head)
36+
if i <= eh
37+
return next(s.head, i)
38+
else
39+
c, j = next(s.tail, i-eh)
40+
return c, j+eh
41+
end
42+
end
43+
44+
endof(s::RopeString) = s.endof
45+
length(s::RopeString) = length(s.head) + length(s.tail)
46+
write(io::IO, s::RopeString) = (write(io, s.head); write(io, s.tail))
47+
sizeof(s::RopeString) = sizeof(s.head) + sizeof(s.tail)

base/SubStrings.jl

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
# This file is a part of Julia. License is MIT: http://julialang.org/license
2+
3+
## substrings reference original strings ##
4+
5+
immutable SubString{T<:AbstractString} <: AbstractString
6+
string::T
7+
offset::Int
8+
endof::Int
9+
10+
function SubString(s::T, i::Int, j::Int)
11+
if i > endof(s) || j<i
12+
return new(s, i-1, 0)
13+
else
14+
if !isvalid(s,i)
15+
throw(ArgumentError("invalid SubString index"))
16+
end
17+
18+
while !isvalid(s,j) && j > i
19+
j -= 1
20+
end
21+
22+
o = i-1
23+
new(s, o, max(0, j-o))
24+
end
25+
end
26+
end
27+
SubString{T<:AbstractString}(s::T, i::Int, j::Int) = SubString{T}(s, i, j)
28+
SubString(s::SubString, i::Int, j::Int) = SubString(s.string, s.offset+i, s.offset+j)
29+
SubString(s::AbstractString, i::Integer, j::Integer) = SubString(s, Int(i), Int(j))
30+
SubString(s::AbstractString, i::Integer) = SubString(s, i, endof(s))
31+
32+
sizeof(s::SubString{ASCIIString}) = s.endof
33+
sizeof(s::SubString{UTF8String}) = s.endof == 0 ? 0 : nextind(s, s.endof) - 1
34+
35+
# TODO: length(s::SubString) = ??
36+
# default implementation will work but it's slow
37+
# can this be delegated efficiently somehow?
38+
# that may require additional string interfaces
39+
length{T<:DirectIndexString}(s::SubString{T}) = endof(s)
40+
41+
function length(s::SubString{UTF8String})
42+
return s.endof==0 ? 0 : Int(ccall(:u8_charnum, Csize_t, (Ptr{UInt8}, Csize_t),
43+
pointer(s), nextind(s, s.endof) - 1))
44+
end
45+
46+
function next(s::SubString, i::Int)
47+
if i < 1 || i > s.endof
48+
throw(BoundsError(s, i))
49+
end
50+
c, i = next(s.string, i+s.offset)
51+
c, i-s.offset
52+
end
53+
54+
function getindex(s::SubString, i::Int)
55+
if i < 1 || i > s.endof
56+
throw(BoundsError(s, i))
57+
end
58+
getindex(s.string, i+s.offset)
59+
end
60+
61+
endof(s::SubString) = s.endof
62+
63+
function isvalid(s::SubString, i::Integer)
64+
return (start(s) <= i <= endof(s)) && isvalid(s.string, s.offset+i)
65+
end
66+
67+
isvalid{T<:DirectIndexString}(s::SubString{T}, i::Integer) = (start(s) <= i <= endof(s))
68+
69+
ind2chr{T<:DirectIndexString}(s::SubString{T}, i::Integer) = begin checkbounds(s,i); i end
70+
chr2ind{T<:DirectIndexString}(s::SubString{T}, i::Integer) = begin checkbounds(s,i); i end
71+
72+
nextind(s::SubString, i::Integer) = nextind(s.string, i+s.offset)-s.offset
73+
prevind(s::SubString, i::Integer) = prevind(s.string, i+s.offset)-s.offset
74+
75+
convert{T<:AbstractString}(::Type{SubString{T}}, s::T) = SubString(s, 1, endof(s))
76+
77+
bytestring{T <: ByteString}(p::SubString{T}) = bytestring(p.string.data[1+p.offset:p.offset+nextind(p, p.endof)-1])
78+
79+
function getindex(s::AbstractString, r::UnitRange{Int})
80+
if first(r) < 1 || endof(s) < last(r)
81+
throw(BoundsError(s, r))
82+
end
83+
SubString(s, first(r), last(r))
84+
end
85+
86+
isascii(s::SubString{ASCIIString}) = true
87+
88+
function cmp{T<:ByteString,S<:ByteString}(a::SubString{T}, b::SubString{S})
89+
na = sizeof(a)
90+
nb = sizeof(b)
91+
c = ccall(:memcmp, Int32, (Ptr{UInt8}, Ptr{UInt8}, UInt),
92+
pointer(a), pointer(b), min(na,nb))
93+
c < 0 ? -1 : c > 0 ? +1 : cmp(na,nb)
94+
end

base/hashing2.jl

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,3 +166,15 @@ end
166166
## hashing Float16s ##
167167

168168
hash(x::Float16, h::UInt) = hash(Float64(x), h)
169+
170+
## hashing strings ##
171+
172+
const memhash = UInt === UInt64 ? :memhash_seed : :memhash32_seed
173+
const memhash_seed = UInt === UInt64 ? 0x71e729fd56419c81 : 0x56419c81
174+
175+
function hash{T<:ByteString}(s::Union{T,SubString{T}}, h::UInt)
176+
h += memhash_seed
177+
# note: use pointer(s) here (see #6058).
178+
ccall(memhash, UInt, (Ptr{UInt8}, Csize_t, UInt32), pointer(s), sizeof(s), h % UInt32) + h
179+
end
180+
hash(s::AbstractString, h::UInt) = hash(bytestring(s), h)

0 commit comments

Comments
 (0)