Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Clean up search and find API #24673

Merged
merged 4 commits into from
Jan 4, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -900,6 +900,14 @@ Deprecated or removed
in favor of dot overloading (`getproperty`) so factors should now be accessed as e.g.
`F.Q` instead of `F[:Q]` ([#25184]).

* `search` and `rsearch` have been deprecated in favor of `findfirst`/`findnext` and
`findlast`/`findprev` respectively, in combination with the new `equalto` and `occursin`
predicates for some methods ([#24673]

* `ismatch(regex, str)` has been deprecated in favor of `contains(str, regex)` ([#24673]).

* `findin(a, b)` has been deprecated in favor of `find(occursin(b), a)` ([#24673]).

Command-line option changes
---------------------------

Expand Down
6 changes: 3 additions & 3 deletions base/abstractarray.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1055,7 +1055,7 @@ get(A::AbstractArray, I::Dims, default) = checkbounds(Bool, A, I...) ? A[I...] :

function get!(X::AbstractVector{T}, A::AbstractVector, I::Union{AbstractRange,AbstractVector{Int}}, default::T) where T
# 1d is not linear indexing
ind = findin(I, indices1(A))
ind = find(occursin(indices1(A)), I)
X[ind] = A[I[ind]]
Xind = indices1(X)
X[first(Xind):first(ind)-1] = default
Expand All @@ -1064,7 +1064,7 @@ function get!(X::AbstractVector{T}, A::AbstractVector, I::Union{AbstractRange,Ab
end
function get!(X::AbstractArray{T}, A::AbstractArray, I::Union{AbstractRange,AbstractVector{Int}}, default::T) where T
# Linear indexing
ind = findin(I, 1:length(A))
ind = find(occursin(1:length(A)), I)
X[ind] = A[I[ind]]
X[1:first(ind)-1] = default
X[last(ind)+1:length(X)] = default
Expand Down Expand Up @@ -1237,7 +1237,7 @@ _cs(d, a, b) = (a == b ? a : throw(DimensionMismatch(
"mismatch in dimension $d (expected $a got $b)")))

dims2cat(::Val{n}) where {n} = ntuple(i -> (i == n), Val(n))
dims2cat(dims) = ntuple(i -> (i in dims), maximum(dims))
dims2cat(dims) = ntuple(occursin(dims), maximum(dims))

cat(dims, X...) = cat_t(dims, promote_eltypeof(X...), X...)

Expand Down
47 changes: 11 additions & 36 deletions base/array.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1799,6 +1799,7 @@ end

find(x::Bool) = x ? [1] : Vector{Int}()
find(testf::Function, x::Number) = !testf(x) ? Vector{Int}() : [1]
find(p::OccursIn, x::Number) = x in p.x ? Vector{Int}() : [1]

"""
findnz(A)
Expand Down Expand Up @@ -2008,7 +2009,7 @@ function _findin(a, b)
ind
end

# If two collections are already sorted, findin can be computed with
# If two collections are already sorted, _findin can be computed with
# a single traversal of the two collections. This is much faster than
# using a hash table (although it has the same complexity).
function _sortedfindin(v, w)
Expand Down Expand Up @@ -2050,42 +2051,16 @@ function _sortedfindin(v, w)
return out
end

"""
findin(a, b)

Return the indices of elements in collection `a` that appear in collection `b`.

# Examples
```jldoctest
julia> a = collect(1:3:15)
5-element Array{Int64,1}:
1
4
7
10
13

julia> b = collect(2:4:10)
3-element Array{Int64,1}:
2
6
10

julia> findin(a,b) # 10 is the only common element
1-element Array{Int64,1}:
4
```
"""
function findin(a::Array{<:Real}, b::Union{Array{<:Real},Real})
if issorted(a, Sort.Forward) && issorted(b, Sort.Forward)
return _sortedfindin(a, b)
function find(pred::OccursIn{<:Union{Array{<:Real},Real}}, x::Array{<:Real})
if issorted(x, Sort.Forward) && issorted(pred.x, Sort.Forward)
return _sortedfindin(x, pred.x)
else
return _findin(a, b)
return _findin(x, pred.x)
end
end
# issorted fails for some element types so the method above has to be restricted
# to element with isless/< defined.
findin(a, b) = _findin(a, b)
find(pred::OccursIn, x::Union{AbstractArray, Tuple}) = _findin(x, pred.x)

# Copying subregions
function indcopy(sz::Dims, I::Vector)
Expand All @@ -2094,8 +2069,8 @@ function indcopy(sz::Dims, I::Vector)
for i = n+1:length(sz)
s *= sz[i]
end
dst = eltype(I)[findin(I[i], i < n ? (1:sz[i]) : (1:s)) for i = 1:n]
src = eltype(I)[I[i][findin(I[i], i < n ? (1:sz[i]) : (1:s))] for i = 1:n]
dst = eltype(I)[_findin(I[i], i < n ? (1:sz[i]) : (1:s)) for i = 1:n]
src = eltype(I)[I[i][_findin(I[i], i < n ? (1:sz[i]) : (1:s))] for i = 1:n]
dst, src
end

Expand All @@ -2105,8 +2080,8 @@ function indcopy(sz::Dims, I::Tuple{Vararg{RangeIndex}})
for i = n+1:length(sz)
s *= sz[i]
end
dst::typeof(I) = ntuple(i-> findin(I[i], i < n ? (1:sz[i]) : (1:s)), n)::typeof(I)
src::typeof(I) = ntuple(i-> I[i][findin(I[i], i < n ? (1:sz[i]) : (1:s))], n)::typeof(I)
dst::typeof(I) = ntuple(i-> _findin(I[i], i < n ? (1:sz[i]) : (1:s)), n)::typeof(I)
src::typeof(I) = ntuple(i-> I[i][_findin(I[i], i < n ? (1:sz[i]) : (1:s))], n)::typeof(I)
dst, src
end

Expand Down
53 changes: 51 additions & 2 deletions base/deprecated.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3806,16 +3806,65 @@ end
@deprecate getq(F::Factorization) F.Q
end

# issue #5290
@deprecate lexcmp(x::AbstractArray, y::AbstractArray) cmp(x, y)
@deprecate lexcmp(x::Real, y::Real) cmp(isless, x, y)
@deprecate lexcmp(x::Complex, y::Complex) cmp((real(x),imag(x)), (real(y),imag(y)))
@deprecate lexcmp(x, y) cmp(x, y)

@deprecate lexless isless

# END 0.7 deprecations
@deprecate search(str::Union{String,SubString}, re::Regex, idx::Integer) findnext(re, str, idx)
@deprecate search(s::AbstractString, r::Regex, idx::Integer) findnext(r, s, idx)
@deprecate search(s::AbstractString, r::Regex) findfirst(r, s)
@deprecate search(s::AbstractString, c::Char, i::Integer) findnext(equalto(c), s, i)
@deprecate search(s::AbstractString, c::Char) findfirst(equalto(c), s)
@deprecate search(a::ByteArray, b::Union{Int8,UInt8}, i::Integer) findnext(equalto(b), a, i)
@deprecate search(a::ByteArray, b::Union{Int8,UInt8}) findfirst(equalto(b), a)
@deprecate search(a::String, b::Union{Int8,UInt8}, i::Integer) findnext(equalto(b), unsafe_wrap(Vector{UInt8}, a), i)
@deprecate search(a::String, b::Union{Int8,UInt8}) findfirst(equalto(b), unsafe_wrap(Vector{UInt8}, a))
@deprecate search(a::ByteArray, b::Char, i::Integer) findnext(equalto(UInt8(b)), a, i)
@deprecate search(a::ByteArray, b::Char) findfirst(equalto(UInt8(b)), a)

@deprecate search(s::AbstractString, c::Union{Tuple{Vararg{Char}},AbstractVector{Char},Set{Char}}, i::Integer) findnext(occursin(c), s, i)
@deprecate search(s::AbstractString, c::Union{Tuple{Vararg{Char}},AbstractVector{Char},Set{Char}}) findfirst(occursin(c), s)
@deprecate search(s::AbstractString, t::AbstractString, i::Integer) findnext(t, s, i)
@deprecate search(s::AbstractString, t::AbstractString) findfirst(t, s)

@deprecate search(buf::IOBuffer, delim::UInt8) findfirst(equalto(delim), buf)
@deprecate search(buf::Base.GenericIOBuffer, delim::UInt8) findfirst(equalto(delim), buf)

@deprecate rsearch(s::AbstractString, c::Union{Tuple{Vararg{Char}},AbstractVector{Char},Set{Char}}, i::Integer) findprev(occursin(c), s, i)
@deprecate rsearch(s::AbstractString, c::Union{Tuple{Vararg{Char}},AbstractVector{Char},Set{Char}}) findlast(occursin(c), s)
@deprecate rsearch(s::AbstractString, t::AbstractString, i::Integer) findprev(t, s, i)
@deprecate rsearch(s::AbstractString, t::AbstractString) findlast(t, s)
@deprecate rsearch(s::ByteArray, t::ByteArray, i::Integer) findprev(t, s, i)
@deprecate rsearch(s::ByteArray, t::ByteArray) findlast(t, s)

@deprecate rsearch(str::Union{String,SubString}, re::Regex, idx::Integer) findprev(re, str, idx)
@deprecate rsearch(str::Union{String,SubString}, re::Regex) findlast(re, str)
@deprecate rsearch(s::AbstractString, r::Regex, idx::Integer) findprev(r, s, idx)
@deprecate rsearch(s::AbstractString, r::Regex) findlast(r, s)
@deprecate rsearch(s::AbstractString, c::Char, i::Integer) findprev(equalto(c), s, i)
@deprecate rsearch(s::AbstractString, c::Char) findlast(equalto(c), s)
@deprecate rsearch(a::Union{String,ByteArray}, b::Union{Int8,UInt8}, i::Integer = endof(a)) findprev(equalto(b), a, i)
@deprecate rsearch(a::String, b::Union{Int8,UInt8}, i::Integer = endof(a)) findprev(equalto(Char(b)), a, i)
@deprecate rsearch(a::ByteArray, b::Char, i::Integer = endof(a)) findprev(equalto(UInt8(b)), a, i)

@deprecate searchindex(s::AbstractString, t::AbstractString) first(findfirst(t, s))
@deprecate searchindex(s::AbstractString, t::AbstractString, i::Integer) first(findnext(t, s, i))
@deprecate rsearchindex(s::AbstractString, t::AbstractString) first(findlast(t, s))
@deprecate rsearchindex(s::AbstractString, t::AbstractString, i::Integer) first(findprev(t, s, i))

@deprecate searchindex(s::AbstractString, c::Char) first(findfirst(equalto(c), s))
@deprecate searchindex(s::AbstractString, c::Char, i::Integer) first(findnext(equalto(c), s, i))
@deprecate rsearchindex(s::AbstractString, c::Char) first(findlast(equalto(c), s))
@deprecate rsearchindex(s::AbstractString, c::Char, i::Integer) first(findprev(equalto(c), s, i))

@deprecate ismatch(r::Regex, s::AbstractString) contains(s, r)

@deprecate findin(a, b) find(occursin(b), a)

# END 0.7 deprecations
# BEGIN 1.0 deprecations

# END 1.0 deprecations
4 changes: 2 additions & 2 deletions base/docs/utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -359,7 +359,7 @@ const builtins = ["abstract type", "baremodule", "begin", "break",

moduleusings(mod) = ccall(:jl_module_usings, Any, (Any,), mod)

filtervalid(names) = filter(x->!ismatch(r"#", x), map(string, names))
filtervalid(names) = filter(x->!contains(x, r"#"), map(string, names))

accessible(mod::Module) =
[filter!(s -> !Base.isdeprecated(mod, s), names(mod, true, true));
Expand All @@ -373,7 +373,7 @@ completions(name::Symbol) = completions(string(name))
# Searching and apropos

# Docsearch simply returns true or false if an object contains the given needle
docsearch(haystack::AbstractString, needle) = !isempty(search(haystack, needle))
docsearch(haystack::AbstractString, needle) = !isempty(findfirst(needle, haystack))
docsearch(haystack::Symbol, needle) = docsearch(string(haystack), needle)
docsearch(::Nothing, needle) = false
function docsearch(haystack::Array, needle)
Expand Down
51 changes: 24 additions & 27 deletions base/exports.jl
Original file line number Diff line number Diff line change
Expand Up @@ -409,18 +409,6 @@ export
extrema,
fill!,
fill,
find,
findfirst,
findlast,
findin,
findmax,
findmin,
findmin!,
findmax!,
findn,
findnext,
findprev,
findnz,
first,
flipdim,
hcat,
Expand Down Expand Up @@ -476,9 +464,6 @@ export
rot180,
rotl90,
rotr90,
searchsorted,
searchsortedfirst,
searchsortedlast,
shuffle,
shuffle!,
size,
Expand All @@ -501,6 +486,30 @@ export
view,
zeros,

# search, find, match and related functions
contains,
eachmatch,
endswith,
equalto,
find,
findfirst,
findlast,
findmax,
findmin,
findmin!,
findmax!,
findn,
findnext,
findprev,
findnz,
occursin,
match,
matchall,
searchsorted,
searchsortedfirst,
searchsortedlast,
startswith,

# linear algebra
bkfact!,
bkfact,
Expand Down Expand Up @@ -611,7 +620,6 @@ export
any!,
any,
collect,
contains,
count,
delete!,
deleteat!,
Expand Down Expand Up @@ -679,7 +687,6 @@ export
# strings and text output
ascii,
base,
startswith,
bin,
bitstring,
bytes2hex,
Expand All @@ -691,22 +698,17 @@ export
digits,
digits!,
dump,
eachmatch,
endswith,
escape_string,
hex,
hex2bytes,
hex2bytes!,
info,
isascii,
ismatch,
isvalid,
join,
logging,
lpad,
lstrip,
match,
matchall,
ncodeunits,
ndigits,
nextind,
Expand All @@ -723,12 +725,8 @@ export
repr,
reverseind,
rpad,
rsearch,
rsearchindex,
rsplit,
rstrip,
search,
searchindex,
show,
showcompact,
showerror,
Expand Down Expand Up @@ -800,7 +798,6 @@ export
identity,
isbits,
isequal,
equalto,
isimmutable,
isless,
ifelse,
Expand Down
6 changes: 3 additions & 3 deletions base/interactiveutil.jl
Original file line number Diff line number Diff line change
Expand Up @@ -334,13 +334,13 @@ function versioninfo(io::IO=STDOUT; verbose::Bool=false, packages::Bool=false)

println(io, "Environment:")
for (k,v) in ENV
if ismatch(r"JULIA", String(k))
if contains(String(k), r"JULIA")
println(io, " $(k) = $(v)")
end
end
if verbose
for (k,v) in ENV
if ismatch(r"PATH|FLAG|^TERM$|HOME", String(k))
if contains(String(k), r"PATH|FLAG|^TERM$|HOME")
println(io, " $(k) = $(v)")
end
end
Expand Down Expand Up @@ -737,7 +737,7 @@ function varinfo(m::Module=Main, pattern::Regex=r"")
(value ∈ (Base, Main, Core) ? "" : format_bytes(summarysize(value))),
summary(value)]
end
for v in sort!(names(m)) if isdefined(m, v) && ismatch(pattern, string(v)) ]
for v in sort!(names(m)) if isdefined(m, v) && contains(string(v), pattern) ]

pushfirst!(rows, Any["name", "size", "summary"])

Expand Down
8 changes: 4 additions & 4 deletions base/iobuffer.jl
Original file line number Diff line number Diff line change
Expand Up @@ -426,18 +426,18 @@ read(io::GenericIOBuffer) = read!(io,StringVector(nb_available(io)))
readavailable(io::GenericIOBuffer) = read(io)
read(io::GenericIOBuffer, nb::Integer) = read!(io,StringVector(min(nb, nb_available(io))))

function search(buf::IOBuffer, delim::UInt8)
function findfirst(delim::EqualTo{UInt8}, buf::IOBuffer)
p = pointer(buf.data, buf.ptr)
q = @gc_preserve buf ccall(:memchr,Ptr{UInt8},(Ptr{UInt8},Int32,Csize_t),p,delim,nb_available(buf))
q = @gc_preserve buf ccall(:memchr,Ptr{UInt8},(Ptr{UInt8},Int32,Csize_t),p,delim.x,nb_available(buf))
nb::Int = (q == C_NULL ? 0 : q-p+1)
return nb
end

function search(buf::GenericIOBuffer, delim::UInt8)
function findfirst(delim::EqualTo{UInt8}, buf::GenericIOBuffer)
data = buf.data
for i = buf.ptr : buf.size
@inbounds b = data[i]
if b == delim
if b == delim.x
return i - buf.ptr + 1
end
end
Expand Down
2 changes: 1 addition & 1 deletion base/libc.jl
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ function strptime(fmt::AbstractString, timestr::AbstractString)
@static if Sys.isapple()
# if we didn't explicitly parse the weekday or year day, use mktime
# to fill them in automatically.
if !ismatch(r"([^%]|^)%(a|A|j|w|Ow)", fmt)
if !contains(fmt, r"([^%]|^)%(a|A|j|w|Ow)")
ccall(:mktime, Int, (Ref{TmStruct},), tm)
end
end
Expand Down
Loading