Merge pull request #975 from JuliaAI/insample-evaluations

Add `InSample` resampling strategy
JuliaAI · May 6, 2024 · d6b1930 · d6b1930
2 parents f811dc3 + 2c85c30
commit d6b1930
Show file tree

Hide file tree

Showing 3 changed files with 62 additions and 2 deletions.
diff --git a/src/MLJBase.jl b/src/MLJBase.jl
@@ -291,7 +291,7 @@ export machines, sources, Stack,
 export TransformedTargetModel
 
 # resampling.jl:
-export ResamplingStrategy, Holdout, CV, StratifiedCV, TimeSeriesCV,
+export ResamplingStrategy, InSample, Holdout, CV, StratifiedCV, TimeSeriesCV,
     evaluate!, Resampler, PerformanceEvaluation, CompactPerformanceEvaluation
 
 # `MLJType` and the abstract `Model` subtypes are exported from within

diff --git a/src/resampling.jl b/src/resampling.jl
@@ -110,6 +110,50 @@ function shuffle_and_rng(shuffle, rng)
     return shuffle, rng
 end
 
+# ----------------------------------------------------------------
+# InSample
+
+"""
+    in_sample = InSample()
+
+Instantiate an `InSample` resampling strategy, for use in `evaluate!`, `evaluate` and in
+tuning. In this strategy the train and test sets are the same, and consist of all
+observations specified by the `rows` keyword argument. If `rows` is not specified, all
+supplied rows are used.
+
+# Example
+
+```julia
+using MLJBase, MLJModels
+
+X, y = make_blobs()  # a table and a vector
+model = ConstantClassifier()
+train, test = partition(eachindex(y), 0.7)  # train:test = 70:30
+```
+
+Compute in-sample (training) loss:
+
+```julia
+evaluate(model, X, y, resampling=InSample(), rows=train, measure=brier_loss)
+```
+
+Compute the out-of-sample loss:
+
+```julia
+evaluate(model, X, y, resampling=[(train, test),], measure=brier_loss)
+```
+
+Or equivalently:
+
+```julia
+evaluate(model, X, y, resampling=Holdout(fraction_train=0.7), measure=brier_loss)
+```
+
+"""
+struct InSample <: ResamplingStrategy end
+
+train_test_pairs(::InSample, rows) = [(rows, rows),]
+
 # ----------------------------------------------------------------
 # Holdout
 
@@ -118,7 +162,7 @@ end
                          shuffle=nothing,
                          rng=nothing)
 
-Holdout resampling strategy, for use in `evaluate!`, `evaluate` and in
+Instantiate a `Holdout` resampling strategy, for use in `evaluate!`, `evaluate` and in
 tuning.
 
     train_test_pairs(holdout, rows)

diff --git a/test/resampling.jl b/test/resampling.jl
@@ -364,6 +364,22 @@ end
     end
 end
 
+@testset "insample" begin
+    rows = rand(Int, 100)
+    @test MLJBase.train_test_pairs(InSample(), rows) == [(rows, rows),]
+
+    X, y = make_regression(20)
+    model = Models.DeterministicConstantRegressor()
+
+    # all rows:
+    e = evaluate(model, X, y, resampling=InSample(), measure=rms)
+    @test e.measurement[1] ≈ std(y, corrected=false)
+
+    # subsample of rows:
+    e = evaluate(model, X, y, resampling=InSample(), measure=rms, rows=1:7)
+    @test e.measurement[1] ≈ std(y[1:7], corrected=false)
+end
+
 @testset_accelerated "holdout" accel begin
     x1 = ones(4)
     x2 = ones(4)