From 6f102279fc350f2bd0ff7f4b135d9bb1f005d23c Mon Sep 17 00:00:00 2001 From: John Rush Date: Sun, 13 Apr 2025 11:19:40 -0700 Subject: [PATCH] fix backward feature selection for fixed_steps --- ISLP/models/strategy.py | 17 ++++++++------- tests/models/test_selection.py | 40 +++++++++++++++++++++++++++++++++- 2 files changed, 48 insertions(+), 9 deletions(-) diff --git a/ISLP/models/strategy.py b/ISLP/models/strategy.py index f237db3..950278f 100644 --- a/ISLP/models/strategy.py +++ b/ISLP/models/strategy.py @@ -403,7 +403,7 @@ def first_peak(model_spec, initial_terms_.append(term) initial_state = tuple(initial_terms_) else: - initial_state = () + initial_state = () if direction == 'forward' else list(model_spec.terms) if not parsimonious: _postprocess = _postprocess_best @@ -455,27 +455,28 @@ def fixed_steps(model_spec, """ + n_terms = n_steps if direction == 'forward' else len(list(model_spec.terms)) - n_steps step = Stepwise(model_spec, direction=direction, - min_terms=n_steps, - max_terms=n_steps, + min_terms=n_terms, + max_terms=n_terms, lower_terms=lower_terms, upper_terms=upper_terms, validator=validator) # pick an initial state - if initial_terms is not None: + if initial_terms is not None and initial_terms != []: initial_terms_ = [] + mm_terms = list(model_spec.terms) for term in initial_terms: - mm_terms = list(model_spec.terms) if term in mm_terms: idx = mm_terms.index(term) term = model_spec.terms_[idx] initial_terms_.append(term) initial_state = tuple(initial_terms_) else: - initial_state = () + initial_state = () if direction == 'forward' else list(model_spec.terms_) if not step.lower_terms.issubset(initial_state): raise ValueError('initial_state should contain %s' % str(step.lower_terms)) @@ -486,8 +487,8 @@ def fixed_steps(model_spec, return Strategy(initial_state, step.candidate_states, model_spec.build_submodel, - partial(fixed_steps, n_steps), - partial(_postprocess_fixed_steps, n_steps)) + partial(fixed_steps, n_terms), + partial(_postprocess_fixed_steps, n_terms)) def min_max(model_spec, diff --git a/tests/models/test_selection.py b/tests/models/test_selection.py index 0688307..5bcd4f8 100644 --- a/tests/models/test_selection.py +++ b/tests/models/test_selection.py @@ -113,7 +113,45 @@ def test_step(): print(step_selector.results_) print(step_selector.selected_state_) print('huh') - + +def test_fixed_steps_backward(): + + n, p = 100, 7 + rng = np.random.default_rng(1) + X = rng.standard_normal((n, p)) + Y = rng.standard_normal(n) + D = pd.DataFrame(X, columns=['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J'][:p]) + D['A'] = pd.Categorical(rng.choice(range(5), (n,), replace=True)) + + model_spec = MS(list(D.columns)) + model_spec.fit(D) + + num_steps = 4 + strategy = Stepwise.fixed_steps(model_spec, + num_steps, + direction='backward') + + step_selector = FeatureSelector(LinearRegression(), + strategy, + cv=3) + step_selector.fit(D, Y) + + print("selected", [term.name for term in step_selector.selected_state_]) + assert len(step_selector.selected_state_) == (len(model_spec.terms) - num_steps) + + num_steps = 2 + strategy = Stepwise.fixed_steps(model_spec, + num_steps, + direction='backward') + + step_selector = FeatureSelector(LinearRegression(), + strategy, + cv=None) + step_selector.fit(D, Y) + + print("selected", [term.name for term in step_selector.selected_state_]) + assert len(step_selector.selected_state_) == (len(model_spec.terms) - num_steps) + def test_constraint(): rng = np.random.default_rng(3)