Skip to content

Commit

Permalink
Merge branch 'docstring-patch-1' of https://github.com/abhro/MLJBase.jl
Browse files Browse the repository at this point in the history
… into abhro-docstring-patch-1
  • Loading branch information
ablaom committed May 6, 2024
2 parents d6b1930 + dbba742 commit e0ca155
Show file tree
Hide file tree
Showing 13 changed files with 238 additions and 222 deletions.
18 changes: 9 additions & 9 deletions src/composition/learning_networks/nodes.jl
Original file line number Diff line number Diff line change
Expand Up @@ -409,14 +409,14 @@ of nodes, sources and other arguments.
### Examples
```
X = source(π)
W = @node sin(X)
```julia-repl
julia> X = source(π)
julia> W = @node sin(X)
julia> W()
0
X = source(1:10)
Y = @node selectrows(X, 3:4)
julia> X = source(1:10)
julia> Y = @node selectrows(X, 3:4)
julia> Y()
3:4
Expand All @@ -425,10 +425,10 @@ julia> Y(["one", "two", "three", "four"])
"three"
"four"
X1 = source(4)
X2 = source(5)
add(a, b, c) = a + b + c
N = @node add(X1, 1, X2)
julia> X1 = source(4)
julia> X2 = source(5)
julia> add(a, b, c) = a + b + c
julia> N = @node add(X1, 1, X2)
julia> N()
10
Expand Down
18 changes: 9 additions & 9 deletions src/composition/learning_networks/signatures.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@
**Private method.**
Return a dictionary of machines, keyed on model, for the all machines in the completed
learning network for which `node` is the greatest lower bound. Only machines bound to
symbolic models are included. Values are always vectors, even if they contain only a
single machine.
Return a dictionary of machines, keyed on model, for the all machines in the
completed learning network for which `node` is the greatest lower bound. Only
machines bound to symbolic models are included. Values are always vectors,
even if they contain only a single machine.
"""
function machines_given_model(node::AbstractNode)
Expand All @@ -35,14 +35,14 @@ attempt_scalarize(v) = length(v) == 1 ? v[1] : v
**Private method.**
Given a dictionary of machine vectors, keyed on model names (symbols), broadcast `f` over
each vector, and make the result, in the returned named tuple, the value associated with
the corresponding model name as key.
Given a dictionary of machine vectors, keyed on model names (symbols), broadcast
`f` over each vector, and make the result, in the returned named tuple, the
value associated with the corresponding model name as key.
Singleton vector values are scalarized, unless `scalarize = false`.
If a value in the computed named tuple is `nothing`, or a vector of `nothing`s, then the
entry is dropped from the tuple, unless `drop_nothings=false`.
If a value in the computed named tuple is `nothing`, or a vector of `nothing`s,
then the entry is dropped from the tuple, unless `drop_nothings=false`.
"""
function tuple_keyed_on_model(f, machines_given_model; scalarize=true, drop_nothings=true)
Expand Down
17 changes: 8 additions & 9 deletions src/composition/models/stacking.jl
Original file line number Diff line number Diff line change
Expand Up @@ -337,12 +337,12 @@ internal_stack_report(
) = NamedTuple{}()

"""
internal_stack_report(
m::Stack,
verbosity::Int,
y::AbstractNode,
folds_evaluations::Vararg{AbstractNode},
)
internal_stack_report(
m::Stack,
verbosity::Int,
y::AbstractNode,
folds_evaluations::Vararg{AbstractNode},
)
When measure/measures is provided, the folds_evaluation will have been filled by
`store_for_evaluation`. This function is not doing any heavy work (not constructing nodes
Expand Down Expand Up @@ -518,7 +518,7 @@ function oos_set(m::Stack{modelnames}, Xs::Source, ys::Source, tt_pairs) where m
end

#######################################
################# Prefit #################
################# Prefit ##############
#######################################

function prefit(m::Stack{modelnames}, verbosity::Int, X, y) where modelnames
Expand Down Expand Up @@ -564,8 +564,7 @@ const DOC_STACK =
Stack(; metalearner=nothing, name1=model1, name2=model2, ..., keyword_options...)
Implements the two-layer generalized stack algorithm introduced by
[Wolpert
(1992)](https://www.sciencedirect.com/science/article/abs/pii/S0893608005800231)
[Wolpert (1992)](https://www.sciencedirect.com/science/article/abs/pii/S0893608005800231)
and generalized by [Van der Laan et al
(2007)](https://biostats.bepress.com/ucbbiostat/paper222/). Returns an
instance of type `ProbabilisticStack` or `DeterministicStack`,
Expand Down
6 changes: 3 additions & 3 deletions src/composition/models/transformed_target_model.jl
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ const ERR_MODEL_UNSPECIFIED = ArgumentError(
"Expecting atomic model as argument. None specified. "
)
const ERR_TRANSFORMER_UNSPECIFIED = ArgumentError(
"You must specify `transformer=...`. ."
"You must specify `transformer=...`. ."
)
const ERR_TOO_MANY_ARGUMENTS = ArgumentError(
"At most one non-keyword argument, a model, allowed. "
Expand Down Expand Up @@ -123,7 +123,7 @@ y -> mode.(y))`.
A model that normalizes the target before applying ridge regression,
with predictions returned on the original scale:
```
```julia
@load RidgeRegressor pkg=MLJLinearModels
model = RidgeRegressor()
tmodel = TransformedTargetModel(model, transformer=Standardizer())
Expand All @@ -132,7 +132,7 @@ tmodel = TransformedTargetModel(model, transformer=Standardizer())
A model that applies a static `log` transformation to the data, again
returning predictions to the original scale:
```
```julia
tmodel2 = TransformedTargetModel(model, transformer=y->log.(y), inverse=z->exp.(y))
```
Expand Down
47 changes: 29 additions & 18 deletions src/data/data.jl
Original file line number Diff line number Diff line change
Expand Up @@ -104,23 +104,28 @@ corresponding `fractions` of `length(nrows(X))`, where valid fractions
are floats between 0 and 1 whose sum is less than one. The last
fraction is not provided, as it is inferred from the preceding ones.
For "synchronized" partitioning of multiple objects, use the
`multi=true` option described below.
For synchronized partitioning of multiple objects, use the
`multi=true` option.
julia> partition(1:1000, 0.8)
([1,...,800], [801,...,1000])
```julia-repl
julia> partition(1:1000, 0.8)
([1,...,800], [801,...,1000])
julia> partition(1:1000, 0.2, 0.7)
([1,...,200], [201,...,900], [901,...,1000])
julia> partition(1:1000, 0.2, 0.7)
([1,...,200], [201,...,900], [901,...,1000])
julia> partition(reshape(1:10, 5, 2), 0.2, 0.4)
([1 6], [2 7; 3 8], [4 9; 5 10])
julia> partition(reshape(1:10, 5, 2), 0.2, 0.4)
([1 6], [2 7; 3 8], [4 9; 5 10])
X, y = make_blobs() # a table and vector
Xtrain, Xtest = partition(X, 0.8, stratify=y)
julia> X, y = make_blobs() # a table and vector
julia> Xtrain, Xtest = partition(X, 0.8, stratify=y)
```
(Xtrain, Xtest), (ytrain, ytest) = partition((X, y), 0.8, rng=123, multi=true)
Here's an example of synchronized partitioning of multiple objects:
```julia-repl
julia> (Xtrain, Xtest), (ytrain, ytest) = partition((X, y), 0.8, rng=123, multi=true)
```
## Keywords
Expand Down Expand Up @@ -209,7 +214,7 @@ Returns a tuple of tables/vectors with length one greater than the
number of supplied predicates, with the last component including all
previously unselected columns.
```
```julia-repl
julia> table = DataFrame(x=[1,2], y=['a', 'b'], z=[10.0, 20.0], w=["A", "B"])
2×4 DataFrame
Row │ x y z w
Expand Down Expand Up @@ -300,9 +305,11 @@ The method is curried, so that `restrict(folds, i)` is the operator
on data defined by `restrict(folds, i)(X) = restrict(X, folds, i)`.
### Example
folds = ([1, 2], [3, 4, 5], [6,])
restrict([:x1, :x2, :x3, :x4, :x5, :x6], folds, 2) # [:x3, :x4, :x5]
#
```julia
folds = ([1, 2], [3, 4, 5], [6,])
restrict([:x1, :x2, :x3, :x4, :x5, :x6], folds, 2) # [:x3, :x4, :x5]
```
See also [`corestrict`](@ref)
Expand All @@ -322,7 +329,9 @@ all elements of `folds`. Here `folds` is a vector or tuple of integer
vectors, typically representing row indices or a vector, matrix or
table.
complement(([1,2], [3,], [4, 5]), 2) # [1 ,2, 4, 5]
```julia
complement(([1,2], [3,], [4, 5]), 2) # [1 ,2, 4, 5]
```
"""
complement(f, i) = reduce(vcat, collect(f)[Not(i)])
Expand All @@ -345,8 +354,10 @@ on data defined by `corestrict(folds, i)(X) = corestrict(X, folds, i)`.
### Example
folds = ([1, 2], [3, 4, 5], [6,])
corestrict([:x1, :x2, :x3, :x4, :x5, :x6], folds, 2) # [:x1, :x2, :x6]
```julia
folds = ([1, 2], [3, 4, 5], [6,])
corestrict([:x1, :x2, :x3, :x4, :x5, :x6], folds, 2) # [:x1, :x2, :x6]
```
"""
corestrict(f::NTuple{N}, i) where N = FoldComplementRestrictor{i,N}(f)
Expand Down
2 changes: 1 addition & 1 deletion src/data/datasets.jl
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ const COERCE_SUNSPOTS = (
(:sunspot_number=>Continuous),)

"""
load_dataset(fpath, coercions)
load_dataset(fpath, coercions)
Load one of standard dataset like Boston etc assuming the file is a
comma separated file with a header.
Expand Down
61 changes: 23 additions & 38 deletions src/data/datasets_synthetic.jl
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,6 @@ const EXTRA_CLASSIFICATION =
Internal function to finalize the `make_*` functions.
"""
x = [1 2 3 ; 4 5 6]
x
length(size(collect(1:3))) # (
function finalize_Xy(X, y, shuffle, as_table, eltype, rng; clf::Bool=true)
# Shuffle the rows if required
if shuffle
Expand Down Expand Up @@ -78,7 +75,7 @@ By default, a table `X` with `p` columns (features) and `n` rows
### Example
```
```julia
X, y = make_blobs(100, 3; centers=2, cluster_std=[1.0, 3.0])
```
Expand All @@ -95,8 +92,7 @@ function make_blobs(n::Integer=100,

# check arguments make sense
if n < 1 || p < 1
throw(ArgumentError(
"Expected `n` and `p` to be at least 1."))
throw(ArgumentError("Expected `n` and `p` to be at least 1."))
end
if center_box.first >= center_box.second
throw(ArgumentError(
Expand Down Expand Up @@ -181,7 +177,7 @@ $(EXTRA_KW_MAKE*EXTRA_CLASSIFICATION)
### Example
```
```julia
X, y = make_circles(100; noise=0.5, factor=0.3)
```
Expand All @@ -196,12 +192,10 @@ function make_circles(n::Integer=100;

# check arguments make sense
if n < 1
throw(ArgumentError(
"Expected `n` to be at least 1."))
throw(ArgumentError("Expected `n` to be at least 1."))
end
if noise < 0
throw(ArgumentError(
"Noise argument cannot be negative."))
throw(ArgumentError("Noise argument cannot be negative."))
end
if !(0 < factor < 1)
throw(ArgumentError(
Expand All @@ -224,12 +218,12 @@ function make_circles(n::Integer=100;
X .+= noise .* randn(rng, n, 2)
end

return finalize_Xy(X, y, shuffle, as_table, eltype, rng)
return finalize_Xy(X, y, shuffle, as_table, eltype, rng)
end


"""
make_moons(n::Int=100; kwargs...)
make_moons(n::Int=100; kwargs...)
Generates labeled two-dimensional points lying close to two
interleaved semi-circles, for use with classification and clustering
Expand Down Expand Up @@ -257,7 +251,7 @@ membership to the left or right semi-circle.
### Example
```
```julia
X, y = make_moons(100; noise=0.5)
```
Expand All @@ -273,12 +267,10 @@ function make_moons(n::Int=150;

# check arguments make sense
if n < 1
throw(ArgumentError(
"Expected `n` to be at least 1."))
throw(ArgumentError("Expected `n` to be at least 1."))
end
if noise < 0
throw(ArgumentError(
"Noise argument cannot be negative."))
throw(ArgumentError("Noise argument cannot be negative."))
end

rng = init_rng(rng)
Expand Down Expand Up @@ -324,8 +316,7 @@ end
Make portion `s` of vector `θ` exactly 0.
"""
sparsify!(rng, θ, s) =
.*= (rand(rng, length(θ)) .< s))
sparsify!(rng, θ, s) =.*= (rand(rng, length(θ)) .< s))

"""Add outliers to portion s of vector."""
outlify!(rng, y, s) =
Expand All @@ -338,19 +329,18 @@ const SIGMOID_32 = log(Float32(1)/eps(Float32) - Float32(1))
sigmoid(x)
Return the sigmoid computed in a numerically stable way:
``σ(x) = 1/(1+exp(-x))``
"""
function sigmoid(x::Float64)
x > SIGMOID_64 && return one(x)
x < -SIGMOID_64 && return zero(x)
return one(x) / (one(x) + exp(-x))
x > SIGMOID_64 && return one(x)
x < -SIGMOID_64 && return zero(x)
return one(x) / (one(x) + exp(-x))
end
function sigmoid(x::Float32)
x > SIGMOID_32 && return one(x)
x < -SIGMOID_32 && return zero(x)
return one(x) / (one(x) + exp(-x))
x > SIGMOID_32 && return one(x)
x < -SIGMOID_32 && return zero(x)
return one(x) / (one(x) + exp(-x))
end
sigmoid(x) = sigmoid(float(x))

Expand Down Expand Up @@ -392,7 +382,7 @@ $EXTRA_KW_MAKE
### Example
```
```julia
X, y = make_regression(100, 5; noise=0.5, sparse=0.2, outliers=0.1)
```
Expand All @@ -411,24 +401,19 @@ function make_regression(n::Int=100,

# check arguments make sense
if n < 1 || p < 1
throw(ArgumentError(
"Expected `n` and `p` to be at least 1."))
throw(ArgumentError("Expected `n` and `p` to be at least 1."))
end
if n_targets < 1
throw(ArgumentError(
"Expected `n_targets` to be at least 1."))
throw(ArgumentError("Expected `n_targets` to be at least 1."))
end
if !(0 <= sparse < 1)
throw(ArgumentError(
"Sparsity argument must be in [0, 1)."))
throw(ArgumentError("Sparsity argument must be in [0, 1)."))
end
if noise < 0
throw(ArgumentError(
"Noise argument cannot be negative."))
throw(ArgumentError("Noise argument cannot be negative."))
end
if !(0 <= outliers <= 1)
throw(ArgumentError(
"Outliers argument must be in [0, 1]."))
throw(ArgumentError("Outliers argument must be in [0, 1]."))
end

rng = init_rng(rng)
Expand Down
Loading

0 comments on commit e0ca155

Please sign in to comment.