@@ -35,7 +35,7 @@ string(s::AbstractString) = s
35
35
string (xs... ) = print_to_string (xs... )
36
36
37
37
bytestring () = " "
38
- bytestring (s:: Array {UInt8,1 } ) = bytestring (pointer (s),length (s))
38
+ bytestring (s:: Vector {UInt8} ) = bytestring (pointer (s),length (s))
39
39
bytestring (s:: AbstractString... ) = print_to_string (s... )
40
40
41
41
function bytestring (p:: Union(Ptr{UInt8},Ptr{Int8}) )
@@ -49,10 +49,10 @@ function bytestring(p::Union(Ptr{UInt8},Ptr{Int8}),len::Integer)
49
49
ccall (:jl_pchar_to_string , ByteString, (Ptr{UInt8},Int), p, len)
50
50
end
51
51
52
- convert (:: Type{Array {UInt8,1 }} , s:: AbstractString ) = bytestring (s). data
52
+ convert (:: Type{Vector {UInt8}} , s:: AbstractString ) = bytestring (s). data
53
53
convert (:: Type{Array{UInt8}} , s:: AbstractString ) = bytestring (s). data
54
54
convert (:: Type{ByteString} , s:: AbstractString ) = bytestring (s)
55
- convert (:: Type{Array {Char,1 }} , s:: AbstractString ) = collect (s)
55
+ convert (:: Type{Vector {Char}} , s:: AbstractString ) = collect (s)
56
56
convert (:: Type{Symbol} , s:: AbstractString ) = symbol (s)
57
57
58
58
# # generic supplied functions ##
@@ -301,21 +301,25 @@ function _searchindex(s::Array, t::Array, i)
301
301
0
302
302
end
303
303
304
- searchindex (s:: Union(Array{UInt8,1},Array{Int8,1}) ,t:: Union(Array{UInt8,1},Array{Int8,1}) ,i) = _searchindex (s,t,i)
304
+ typealias ByteArray Union (Vector{UInt8},Vector{Int8})
305
+
306
+ searchindex (s:: ByteArray , t:: ByteArray , i) = _searchindex (s,t,i)
305
307
searchindex (s:: AbstractString , t:: AbstractString , i:: Integer ) = _searchindex (s,t,i)
306
308
searchindex (s:: AbstractString , t:: AbstractString ) = searchindex (s,t,start (s))
307
309
searchindex (s:: AbstractString , c:: Char , i:: Integer ) = _searchindex (s,c,i)
308
310
searchindex (s:: AbstractString , c:: Char ) = searchindex (s,c,start (s))
309
311
310
312
function searchindex (s:: ByteString , t:: ByteString , i:: Integer = 1 )
311
- if length (t) == 1
313
+ # Check for fast case of a single byte
314
+ # (for multi-byte UTF-8 sequences, use searchindex on byte arrays instead)
315
+ if endof (t) == 1
312
316
search (s, t[1 ], i)
313
317
else
314
318
searchindex (s. data, t. data, i)
315
319
end
316
320
end
317
321
318
- function search (s:: Union(Array{UInt8,1},Array{Int8,1}) , t:: Union(Array{UInt8,1},Array{Int8,1}) , i)
322
+ function search (s:: ByteArray , t:: ByteArray , i)
319
323
idx = searchindex (s,t,i)
320
324
if isempty (t)
321
325
idx: idx- 1
@@ -333,7 +337,13 @@ function search(s::AbstractString, t::AbstractString, i::Integer=start(s))
333
337
end
334
338
end
335
339
336
- function rsearch (s:: AbstractString , c:: Chars , i:: Integer = endof (s))
340
+ function rsearch (s:: AbstractString , c:: Chars )
341
+ j = search (RevString (s), c)
342
+ j == 0 && return 0
343
+ endof (s)- j+ 1
344
+ end
345
+
346
+ function rsearch (s:: AbstractString , c:: Chars , i:: Integer )
337
347
e = endof (s)
338
348
j = search (RevString (s), c, e- i+ 1 )
339
349
j == 0 && return 0
@@ -435,27 +445,37 @@ function _rsearchindex(s::Array, t::Array, k)
435
445
0
436
446
end
437
447
438
- rsearchindex (s:: Union(Array{UInt8,1},Array{Int8,1}) , t:: Union(Array{UInt8,1},Array{Int8,1}) ,i) = _rsearchindex (s,t,i)
448
+ rsearchindex (s:: ByteArray , t:: ByteArray ,i) = _rsearchindex (s,t,i)
439
449
rsearchindex (s:: AbstractString , t:: AbstractString , i:: Integer ) = _rsearchindex (s,t,i)
440
450
rsearchindex (s:: AbstractString , t:: AbstractString ) = (isempty (s) && isempty (t)) ? 1 : rsearchindex (s,t,endof (s))
441
451
442
452
function rsearchindex (s:: ByteString , t:: ByteString )
443
- if length (t) == 1
453
+ # Check for fast case of a single byte
454
+ # (for multi-byte UTF-8 sequences, use rsearchindex instead)
455
+ if endof (t) == 1
444
456
rsearch (s, t[1 ])
445
457
else
446
- rsearchindex (s. data, t. data, length (s. data))
458
+ _rsearchindex (s. data, t. data, length (s. data))
447
459
end
448
460
end
449
461
450
462
function rsearchindex (s:: ByteString , t:: ByteString , i:: Integer )
451
- if length (t) == 1
463
+ # Check for fast case of a single byte
464
+ # (for multi-byte UTF-8 sequences, use rsearchindex instead)
465
+ if endof (t) == 1
452
466
rsearch (s, t[1 ], i)
467
+ elseif endof (t) != 0
468
+ _rsearchindex (s. data, t. data, nextind (s, i)- 1 )
469
+ elseif i > sizeof (s)
470
+ return 0
471
+ elseif i == 0
472
+ return 1
453
473
else
454
- rsearchindex (s . data, t . data, i)
474
+ return i
455
475
end
456
476
end
457
477
458
- function rsearch (s:: Union(Array{UInt8,1},Array{Int8,1}) , t:: Union(Array{UInt8,1},Array{Int8,1}) ,i )
478
+ function rsearch (s:: ByteArray , t:: ByteArray , i :: Integer )
459
479
idx = rsearchindex (s,t,i)
460
480
if isempty (t)
461
481
idx: idx- 1
@@ -536,7 +556,7 @@ cmp(a::Symbol, b::Symbol) = Int(sign(ccall(:strcmp, Int32, (Cstring, Cstring), a
536
556
isless (a:: Symbol , b:: Symbol ) = cmp (a,b) < 0
537
557
538
558
startswith (a:: ByteString , b:: ByteString ) = startswith (a. data, b. data)
539
- startswith (a:: Array {UInt8,1 } , b:: Array {UInt8,1 } ) =
559
+ startswith (a:: Vector {UInt8} , b:: Vector {UInt8} ) =
540
560
(length (a) >= length (b) && ccall (:strncmp , Int32, (Ptr{UInt8}, Ptr{UInt8}, UInt), a, b, length (b)) == 0 )
541
561
542
562
# TODO : fast endswith
@@ -957,15 +977,15 @@ unescape_string(s::AbstractString) = sprint(endof(s), print_unescaped, s)
957
977
958
978
# # checking UTF-8 & ACSII validity ##
959
979
960
- byte_string_classify (data:: Array {UInt8,1 } ) =
980
+ byte_string_classify (data:: Vector {UInt8} ) =
961
981
ccall (:u8_isvalid , Int32, (Ptr{UInt8}, Int), data, length (data))
962
982
byte_string_classify (s:: ByteString ) = byte_string_classify (s. data)
963
983
# 0: neither valid ASCII nor UTF-8
964
984
# 1: valid ASCII
965
985
# 2: valid UTF-8
966
986
967
- isvalid (:: Type{ASCIIString} , s:: Union(Array {UInt8,1 },ByteString) ) = byte_string_classify (s) == 1
968
- isvalid (:: Type{UTF8String} , s:: Union(Array {UInt8,1 },ByteString) ) = byte_string_classify (s) != 0
987
+ isvalid (:: Type{ASCIIString} , s:: Union(Vector {UInt8},ByteString) ) = byte_string_classify (s) == 1
988
+ isvalid (:: Type{UTF8String} , s:: Union(Vector {UInt8},ByteString) ) = byte_string_classify (s) != 0
969
989
970
990
# # multiline strings ##
971
991
@@ -1631,8 +1651,6 @@ float{S<:AbstractString}(a::AbstractArray{S}) = map!(float, similar(a,typeof(flo
1631
1651
1632
1652
# find the index of the first occurrence of a value in a byte array
1633
1653
1634
- typealias ByteArray Union (Array{UInt8,1 },Array{Int8,1 })
1635
-
1636
1654
function search (a:: ByteArray , b:: Union(Int8,UInt8) , i:: Integer )
1637
1655
if i < 1
1638
1656
throw (BoundsError (a, i))
@@ -1645,6 +1663,13 @@ function search(a::ByteArray, b::Union(Int8,UInt8), i::Integer)
1645
1663
q = ccall (:memchr , Ptr{UInt8}, (Ptr{UInt8}, Int32, Csize_t), p+ i- 1 , b, n- i+ 1 )
1646
1664
q == C_NULL ? 0 : Int (q- p+ 1 )
1647
1665
end
1666
+ function search (a:: Vector{UInt8} , b:: Char , i:: Integer )
1667
+ if isascii (b)
1668
+ search (a,UInt8 (b),i)
1669
+ else
1670
+ search (a,string (b). data,i). start
1671
+ end
1672
+ end
1648
1673
function search (a:: ByteArray , b:: Char , i:: Integer )
1649
1674
if isascii (b)
1650
1675
search (a,UInt8 (b),i)
@@ -1654,7 +1679,7 @@ function search(a::ByteArray, b::Char, i::Integer)
1654
1679
end
1655
1680
search (a:: ByteArray , b:: Union(Int8,UInt8,Char) ) = search (a,b,1 )
1656
1681
1657
- function rsearch (a:: Union(Array{UInt8,1},Array{Int8,1}) , b:: Union(Int8,UInt8) , i:: Integer )
1682
+ function rsearch (a:: ByteArray , b:: Union(Int8,UInt8) , i:: Integer )
1658
1683
if i < 1
1659
1684
return i == 0 ? 0 : throw (BoundsError (a, i))
1660
1685
end
@@ -1697,7 +1722,7 @@ function hex2bytes(s::ASCIIString)
1697
1722
return arr
1698
1723
end
1699
1724
1700
- bytes2hex {T<:UInt8} (arr:: Array{T,1 } ) = join ([hex (i,2 ) for i in arr])
1725
+ bytes2hex {T<:UInt8} (arr:: Vector{T } ) = join ([hex (i,2 ) for i in arr])
1701
1726
1702
1727
function repr (x)
1703
1728
s = IOBuffer ()
0 commit comments