-
Notifications
You must be signed in to change notification settings - Fork 15
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Co-authored-by: Kleynhans, Bernard <[email protected]> Co-authored-by: Bernard Kleynhans <[email protected]>
- Loading branch information
1 parent
2e7fd9b
commit 4688606
Showing
7 changed files
with
134 additions
and
122 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,4 +2,4 @@ | |
# Copyright FMR LLC <[email protected]> | ||
# SPDX-License-Identifier: GNU GPLv3 | ||
|
||
__version__ = "1.1.0" | ||
__version__ = "1.1.1" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,6 @@ | ||
catboost | ||
joblib | ||
lightgbm | ||
minepy | ||
numpy | ||
pandas | ||
scikit-learn | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,112 +2,115 @@ | |
# Copyright FMR LLC <[email protected]> | ||
# SPDX-License-Identifier: GNU GPLv3 | ||
|
||
from sklearn.datasets import load_boston, load_iris | ||
from feature.utils import get_data_label | ||
from feature.selector import Selective, SelectionMethod | ||
# from sklearn.datasets import load_boston, load_iris | ||
# from feature.utils import get_data_label | ||
# from feature.selector import Selective, SelectionMethod | ||
from tests.test_base import BaseTest | ||
|
||
|
||
class TestMaximalInfo(BaseTest): | ||
|
||
def test_maximal_regress_top_k(self): | ||
data, label = get_data_label(load_boston()) | ||
data = data.drop(columns=["CHAS", "NOX", "RM", "DIS", "RAD", "TAX", "PTRATIO", "INDUS"]) | ||
|
||
method = SelectionMethod.Statistical(num_features=3, method="maximal_info") | ||
selector = Selective(method) | ||
selector.fit(data, label) | ||
subset = selector.transform(data) | ||
|
||
# Reduced columns | ||
self.assertEqual(subset.shape[1], 3) | ||
self.assertListEqual(list(subset.columns), ['CRIM', 'AGE', 'LSTAT']) | ||
|
||
def test_maximal_regress_top_percentile(self): | ||
data, label = get_data_label(load_boston()) | ||
data = data.drop(columns=["CHAS", "NOX", "RM", "DIS", "RAD", "TAX", "PTRATIO", "INDUS"]) | ||
|
||
method = SelectionMethod.Statistical(num_features=0.6, method="maximal_info") | ||
selector = Selective(method) | ||
selector.fit(data, label) | ||
subset = selector.transform(data) | ||
|
||
# Reduced columns | ||
self.assertEqual(subset.shape[1], 3) | ||
self.assertListEqual(list(subset.columns), ['CRIM', 'AGE', 'LSTAT']) | ||
|
||
def test_maximal_regress_top_k_all(self): | ||
data, label = get_data_label(load_boston()) | ||
data = data.drop(columns=["CHAS", "NOX", "RM", "DIS", "RAD", "TAX", "PTRATIO", "INDUS"]) | ||
|
||
method = SelectionMethod.Statistical(num_features=5, method="maximal_info") | ||
selector = Selective(method) | ||
selector.fit(data, label) | ||
subset = selector.transform(data) | ||
|
||
# Reduced columns | ||
self.assertEqual(data.shape[1], subset.shape[1]) | ||
self.assertListEqual(list(data.columns), list(subset.columns)) | ||
|
||
def test_maximal_regress_top_percentile_all(self): | ||
data, label = get_data_label(load_boston()) | ||
data = data.drop(columns=["CHAS", "NOX", "RM", "DIS", "RAD", "TAX", "PTRATIO", "INDUS"]) | ||
|
||
method = SelectionMethod.Statistical(num_features=1.0, method="maximal_info") | ||
selector = Selective(method) | ||
selector.fit(data, label) | ||
subset = selector.transform(data) | ||
|
||
# Reduced columns | ||
self.assertEqual(data.shape[1], subset.shape[1]) | ||
self.assertListEqual(list(data.columns), list(subset.columns)) | ||
|
||
def test_maximal_classif_top_k(self): | ||
data, label = get_data_label(load_iris()) | ||
|
||
method = SelectionMethod.Statistical(num_features=2, method="maximal_info") | ||
selector = Selective(method) | ||
selector.fit(data, label) | ||
subset = selector.transform(data) | ||
|
||
# Reduced columns | ||
self.assertEqual(subset.shape[1], 2) | ||
self.assertListEqual(list(subset.columns), ['petal length (cm)', 'petal width (cm)']) | ||
|
||
def test_maximal_classif_top_percentile(self): | ||
data, label = get_data_label(load_iris()) | ||
|
||
method = SelectionMethod.Statistical(num_features=0.5, method="maximal_info") | ||
selector = Selective(method) | ||
selector.fit(data, label) | ||
subset = selector.transform(data) | ||
|
||
# Reduced columns | ||
self.assertEqual(subset.shape[1], 2) | ||
self.assertListEqual(list(subset.columns), ['petal length (cm)', 'petal width (cm)']) | ||
|
||
def test_maximal_classif_top_percentile_all(self): | ||
data, label = get_data_label(load_iris()) | ||
|
||
method = SelectionMethod.Statistical(num_features=1.0, method="maximal_info") | ||
selector = Selective(method) | ||
selector.fit(data, label) | ||
subset = selector.transform(data) | ||
|
||
# Reduced columns | ||
self.assertEqual(subset.shape[1], 4) | ||
self.assertListEqual(list(subset.columns), | ||
['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']) | ||
|
||
def test_maximal_classif_top_k_all(self): | ||
data, label = get_data_label(load_iris()) | ||
|
||
method = SelectionMethod.Statistical(num_features=4, method="maximal_info") | ||
selector = Selective(method) | ||
selector.fit(data, label) | ||
subset = selector.transform(data) | ||
|
||
# Reduced columns | ||
self.assertEqual(subset.shape[1], 4) | ||
self.assertListEqual(list(subset.columns), | ||
['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']) | ||
def test_maximal(self): | ||
pass | ||
|
||
# def test_maximal_regress_top_k(self): | ||
# data, label = get_data_label(load_boston()) | ||
# data = data.drop(columns=["CHAS", "NOX", "RM", "DIS", "RAD", "TAX", "PTRATIO", "INDUS"]) | ||
# | ||
# method = SelectionMethod.Statistical(num_features=3, method="maximal_info") | ||
# selector = Selective(method) | ||
# selector.fit(data, label) | ||
# subset = selector.transform(data) | ||
# | ||
# # Reduced columns | ||
# self.assertEqual(subset.shape[1], 3) | ||
# self.assertListEqual(list(subset.columns), ['CRIM', 'AGE', 'LSTAT']) | ||
# | ||
# def test_maximal_regress_top_percentile(self): | ||
# data, label = get_data_label(load_boston()) | ||
# data = data.drop(columns=["CHAS", "NOX", "RM", "DIS", "RAD", "TAX", "PTRATIO", "INDUS"]) | ||
# | ||
# method = SelectionMethod.Statistical(num_features=0.6, method="maximal_info") | ||
# selector = Selective(method) | ||
# selector.fit(data, label) | ||
# subset = selector.transform(data) | ||
# | ||
# # Reduced columns | ||
# self.assertEqual(subset.shape[1], 3) | ||
# self.assertListEqual(list(subset.columns), ['CRIM', 'AGE', 'LSTAT']) | ||
# | ||
# def test_maximal_regress_top_k_all(self): | ||
# data, label = get_data_label(load_boston()) | ||
# data = data.drop(columns=["CHAS", "NOX", "RM", "DIS", "RAD", "TAX", "PTRATIO", "INDUS"]) | ||
# | ||
# method = SelectionMethod.Statistical(num_features=5, method="maximal_info") | ||
# selector = Selective(method) | ||
# selector.fit(data, label) | ||
# subset = selector.transform(data) | ||
# | ||
# # Reduced columns | ||
# self.assertEqual(data.shape[1], subset.shape[1]) | ||
# self.assertListEqual(list(data.columns), list(subset.columns)) | ||
# | ||
# def test_maximal_regress_top_percentile_all(self): | ||
# data, label = get_data_label(load_boston()) | ||
# data = data.drop(columns=["CHAS", "NOX", "RM", "DIS", "RAD", "TAX", "PTRATIO", "INDUS"]) | ||
# | ||
# method = SelectionMethod.Statistical(num_features=1.0, method="maximal_info") | ||
# selector = Selective(method) | ||
# selector.fit(data, label) | ||
# subset = selector.transform(data) | ||
# | ||
# # Reduced columns | ||
# self.assertEqual(data.shape[1], subset.shape[1]) | ||
# self.assertListEqual(list(data.columns), list(subset.columns)) | ||
# | ||
# def test_maximal_classif_top_k(self): | ||
# data, label = get_data_label(load_iris()) | ||
# | ||
# method = SelectionMethod.Statistical(num_features=2, method="maximal_info") | ||
# selector = Selective(method) | ||
# selector.fit(data, label) | ||
# subset = selector.transform(data) | ||
# | ||
# # Reduced columns | ||
# self.assertEqual(subset.shape[1], 2) | ||
# self.assertListEqual(list(subset.columns), ['petal length (cm)', 'petal width (cm)']) | ||
# | ||
# def test_maximal_classif_top_percentile(self): | ||
# data, label = get_data_label(load_iris()) | ||
# | ||
# method = SelectionMethod.Statistical(num_features=0.5, method="maximal_info") | ||
# selector = Selective(method) | ||
# selector.fit(data, label) | ||
# subset = selector.transform(data) | ||
# | ||
# # Reduced columns | ||
# self.assertEqual(subset.shape[1], 2) | ||
# self.assertListEqual(list(subset.columns), ['petal length (cm)', 'petal width (cm)']) | ||
# | ||
# def test_maximal_classif_top_percentile_all(self): | ||
# data, label = get_data_label(load_iris()) | ||
# | ||
# method = SelectionMethod.Statistical(num_features=1.0, method="maximal_info") | ||
# selector = Selective(method) | ||
# selector.fit(data, label) | ||
# subset = selector.transform(data) | ||
# | ||
# # Reduced columns | ||
# self.assertEqual(subset.shape[1], 4) | ||
# self.assertListEqual(list(subset.columns), | ||
# ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']) | ||
# | ||
# def test_maximal_classif_top_k_all(self): | ||
# data, label = get_data_label(load_iris()) | ||
# | ||
# method = SelectionMethod.Statistical(num_features=4, method="maximal_info") | ||
# selector = Selective(method) | ||
# selector.fit(data, label) | ||
# subset = selector.transform(data) | ||
# | ||
# # Reduced columns | ||
# self.assertEqual(subset.shape[1], 4) | ||
# self.assertListEqual(list(subset.columns), | ||
# ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']) |