Skip to content

Commit

Permalink
Add BaselineRankMultiFeature (#871)
Browse files Browse the repository at this point in the history
* changing PercentileRankOneFeature descending parameter

* update parameter name

* add deprecated descend parameter for backwards compatability

* multi feature ranker

* multi feature ranker tests

* debug test

* fix typo

* avoid duplicating logic in tests

* deprecation warning for PercentileRankOneFeature
  • Loading branch information
shaycrk authored Dec 7, 2021
1 parent 8bd22d8 commit b4ff916
Show file tree
Hide file tree
Showing 6 changed files with 230 additions and 30 deletions.
8 changes: 4 additions & 4 deletions docs/sources/dirtyduck/eis.md
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,7 @@ We will begin defining some basic models as baselines.
'triage.component.catwalk.baselines.rankers.PercentileRankOneFeature':
feature: ['risks_entity_id_all_risk_high_sum', 'inspections_entity_id_all_total_count', 'results_entity_id_all_result_fail_sum']
descend: [True]
low_value_high_score: [True]
'sklearn.dummy.DummyClassifier':
strategy: ['prior', 'stratified']
Expand Down Expand Up @@ -475,9 +475,9 @@ After the experiment finishes, we can create the following table:
1 | SimpleThresholder | {"rules": ["inspections\_entity\_id\_1month\_total\_count > 0"]} | {inspection,inspections,results,risks} | {1,19,37,55,73,91} | {2014-12-01,2015-06-01,2015-12-01,2016-06-01,2016-12-01,2017-06-01} | {" 0.358"," 0.231"," 0.321"," 0.267"," 0.355"," 0.239"}
2 | SimpleThresholder | {"rules": ["results\_entity\_id\_1month\_result\_fail\_sum > 0"]} | {inspection,inspections,results,risks} | {2,20,38,56,74,92} | {2014-12-01,2015-06-01,2015-12-01,2016-06-01,2016-12-01,2017-06-01} | {" 0.316"," 0.316"," 0.323"," 0.344"," 0.330"," 0.312"}
3 | SimpleThresholder | {"rules": ["risks\_entity\_id\_1month\_risk\_high\_sum > 0"]} | {inspection,inspections,results,risks} | {3,21,39,57,75,93} | {2014-12-01,2015-06-01,2015-12-01,2016-06-01,2016-12-01,2017-06-01} | {" 0.364"," 0.248"," 0.355"," 0.286"," 0.371"," 0.257"}
4 | PercentileRankOneFeature | {"descend": true, "feature": "risks\_entity\_id\_all\_risk\_high\_sum"} | {inspection,inspections,results,risks} | {4,22,40,58,76,94} | {2014-12-01,2015-06-01,2015-12-01,2016-06-01,2016-12-01,2017-06-01} | {" 0.121"," 0.193"," 0.124"," 0.230"," 0.112"," 0.161"}
5 | PercentileRankOneFeature | {"descend": true, "feature": "inspections\_entity\_id\_all\_total\_count"} | {inspection,inspections,results,risks} | {5,23,41,59,77,95} | {2014-12-01,2015-06-01,2015-12-01,2016-06-01,2016-12-01,2017-06-01} | {" 0.076"," 0.133"," 0.098"," 0.101"," 0.086"," 0.082"}
6 | PercentileRankOneFeature | {"descend": true, "feature": "results\_entity\_id\_all\_result\_fail\_sum"} | {inspection,inspections,results,risks} | {6,24,42,60,78,96} | {2014-12-01,2015-06-01,2015-12-01,2016-06-01,2016-12-01,2017-06-01} | {" 0.237"," 0.274"," 0.250"," 0.275"," 0.225"," 0.221"}
4 | PercentileRankOneFeature | {"low_value_high_score": true, "feature": "risks\_entity\_id\_all\_risk\_high\_sum"} | {inspection,inspections,results,risks} | {4,22,40,58,76,94} | {2014-12-01,2015-06-01,2015-12-01,2016-06-01,2016-12-01,2017-06-01} | {" 0.121"," 0.193"," 0.124"," 0.230"," 0.112"," 0.161"}
5 | PercentileRankOneFeature | {"low_value_high_score": true, "feature": "inspections\_entity\_id\_all\_total\_count"} | {inspection,inspections,results,risks} | {5,23,41,59,77,95} | {2014-12-01,2015-06-01,2015-12-01,2016-06-01,2016-12-01,2017-06-01} | {" 0.076"," 0.133"," 0.098"," 0.101"," 0.086"," 0.082"}
6 | PercentileRankOneFeature | {"low_value_high_score": true, "feature": "results\_entity\_id\_all\_result\_fail\_sum"} | {inspection,inspections,results,risks} | {6,24,42,60,78,96} | {2014-12-01,2015-06-01,2015-12-01,2016-06-01,2016-12-01,2017-06-01} | {" 0.237"," 0.274"," 0.250"," 0.275"," 0.225"," 0.221"}
7 | DecisionTreeClassifier | {"criterion": "gini", "max\_depth": 1, "max\_features": "sqrt", "min\_samples\_split": 2} | {inspection,inspections,results,risks} | {7,25,43,61,79,97} | {2014-12-01,2015-06-01,2015-12-01,2016-06-01,2016-12-01,2017-06-01} | {" 0.284"," 0.441"," 0.559"," 0.479"," 0.463"," 0.412"}
8 | DecisionTreeClassifier | {"criterion": "gini", "max\_depth": 2, "max\_features": "sqrt", "min\_samples\_split": 2} | {inspection,inspections,results,risks} | {8,26,44,62,80,98} | {2014-12-01,2015-06-01,2015-12-01,2016-06-01,2016-12-01,2017-06-01} | {" 0.401"," 0.388"," 0.533"," 0.594"," 0.519"," 0.649"}
9 | DecisionTreeClassifier | {"criterion": "gini", "max\_depth": 5, "max\_features": "sqrt", "min\_samples\_split": 2} | {inspection,inspections,results,risks} | {9,27,45,63,81,99} | {2014-12-01,2015-06-01,2015-12-01,2016-06-01,2016-12-01,2017-06-01} | {" 0.594"," 0.876"," 0.764"," 0.843"," 0.669"," 0.890"}
Expand Down
13 changes: 12 additions & 1 deletion example/config/experiment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -474,7 +474,18 @@ grid_config:
# predictive models to simply ranking entities on a single feature.
'triage.component.catwalk.baselines.rankers.PercentileRankOneFeature':
feature: ['feature_one', 'feature_two']
descend: True
low_value_high_score: [True]
# catwalk's BaselineRankMultiFeature baseline will score based on the ranking
# by one or more feature (note that the scores don't map to the percentiles as
# in PercentileRankOneFeature. This provides a slightly more complex baseline
# than above, but still realistic for what might be encountered in practice.
# The example below will create two ranker "models": one ranking by two features
# and the other just by a single feature. Note that the rules are lists of
# dictionaries.
'triage.component.catwalk.baselines.rankers.BaselineRankMultiFeature':
rules:
- [{feature: 'feature_1', low_value_high_score: True}, {feature: 'feature_2', low_value_high_score: False}]
- [{feature: 'feature_3', low_value_high_score: True}]
# catwalk's SimpleThresholder baseline will evaluate each entity against
# a list of rules and classify entities as 1 based on whether they meet
# any or all of these rules, depending on whether 'or' or 'and' is
Expand Down
2 changes: 1 addition & 1 deletion example/dirtyduck/experiments/eis_01.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ grid_config:

'triage.component.catwalk.baselines.rankers.PercentileRankOneFeature':
feature: ['risks_entity_id_all_risk_high_sum', 'inspections_entity_id_all_total_count', 'results_entity_id_all_result_fail_sum']
descend: [True]
low_value_high_score: [True]

'sklearn.dummy.DummyClassifier':
strategy: ['prior', 'stratified']
Expand Down
115 changes: 99 additions & 16 deletions src/tests/catwalk_tests/test_baselines.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from unittest import TestCase

from triage.component.catwalk.baselines.rankers import PercentileRankOneFeature
from triage.component.catwalk.baselines.rankers import BaselineRankMultiFeature
from triage.component.catwalk.baselines.thresholders import SimpleThresholder
from triage.component.catwalk.baselines.thresholders import get_operator_method
from triage.component.catwalk.baselines.thresholders import OPERATOR_METHODS
Expand Down Expand Up @@ -46,6 +47,52 @@ def rules(request):
request.cls.rules = ["x1 > 0", "x2 <= 1"]


def scores_align_with_ranks(expected_ranks, returned_scores):
'''
Helper function to check that scores align with ranks
correctly for the ranking baselines (e.g., higher ranks
get higher scores and ties have the same score)
'''
df = pd.DataFrame({
'rank': expected_ranks,
'score': returned_scores
}).sort_values('rank', ascending=True)

curr_rank = None
curr_score = None

# Loop through the sorted records to check for any inconsistencies
for ix, rec in df.iterrows():
if curr_rank is None:
curr_rank = rec['rank']
curr_score = rec['score']
continue

if rec['rank'] < curr_rank:
return RuntimeError('Something has gone wrong with df.sort_values!')
elif rec['rank'] == curr_rank and rec['score'] != curr_score:
return False
elif rec['rank'] > curr_rank and rec['score'] <= curr_score:
return False

curr_rank = rec['rank']
curr_score = rec['score']

# If we got through the loop without any issues, return True
return True


def test_scores_align_with_ranks():
# correct, no ties
assert scores_align_with_ranks([1,2,3], [0,0.5,1.0])
# correct, with ties
assert scores_align_with_ranks([1,2,2,3], [0,0.5,0.5,1.0])
# incorrect, no ties
assert not scores_align_with_ranks([1,2,3], [1.0,0.5,0.8])
# ties with different scores
assert not scores_align_with_ranks([1,2,2,3], [0,0.5,0.7,1.0])


@pytest.mark.usefixtures("data")
class TestRankOneFeature(TestCase):
def test_fit(self):
Expand All @@ -62,25 +109,61 @@ def test_ranking_on_unavailable_feature_raises_error(self):
ranker.fit(x=self.data["X_train"], y=self.data["y_train"])

def test_predict_proba(self):
for descend_value in [True, False]:
ranker = PercentileRankOneFeature(feature="x3", descend=descend_value)
for direction_value in [True, False]:
ranker = PercentileRankOneFeature(feature="x3", low_value_high_score=direction_value)
ranker.fit(x=self.data["X_train"], y=self.data["y_train"])
results = ranker.predict_proba(self.data["X_test"])
if descend_value:
expected_results = np.array(
[
np.zeros(len(self.data["X_test"])),
[0.875, 0.125, 0.375, 0, 0.625, 0.25, 0.5, 0.625],
]
).transpose()
if direction_value:
expected_ranks = [6, 1, 3, 0, 5, 2, 4, 5]
else:
expected_results = np.array(
[
np.zeros(len(self.data["X_test"])),
[0, 0.75, 0.5, 0.875, 0.125, 0.625, 0.375, 0.125],
]
).transpose()
np.testing.assert_array_equal(results, expected_results)
expected_ranks = [0, 5, 3, 6, 1, 4, 2, 1]

assert scores_align_with_ranks(expected_ranks, results[:,1])


@pytest.mark.usefixtures("data")
class TestRankMultiFeature(TestCase):
def test_fit(self):
rules = {'feature': 'x3', 'low_value_high_score': False}
ranker = BaselineRankMultiFeature(rules=rules)
assert ranker.feature_importances_ is None
ranker.fit(x=self.data["X_train"], y=self.data["y_train"])
np.testing.assert_array_equal(
ranker.feature_importances_, np.array([0, 0, 1, 0])
)

def test_ranking_on_unavailable_feature_raises_error(self):
rules = [{'feature': 'x5', 'low_value_high_score': False}]
ranker = BaselineRankMultiFeature(rules=rules)
with self.assertRaises(BaselineFeatureNotInMatrix):
ranker.fit(x=self.data["X_train"], y=self.data["y_train"])

def test_predict_proba_one_feature(self):
for direction_value in [True, False]:
rules = {'feature': 'x3', 'low_value_high_score': direction_value}
ranker = BaselineRankMultiFeature(rules=rules)
ranker.fit(x=self.data["X_train"], y=self.data["y_train"])
results = ranker.predict_proba(self.data["X_test"])
if direction_value:
expected_ranks = [6, 1, 3, 0, 5, 2, 4, 5]
else:
expected_ranks = [0, 5, 3, 6, 1, 4, 2, 1]

assert scores_align_with_ranks(expected_ranks, results[:,1])

def test_predict_proba_multi_feature(self):
rules = [
{'feature': 'x3', 'low_value_high_score': True},
{'feature': 'x2', 'low_value_high_score': False}
]

ranker = BaselineRankMultiFeature(rules=rules)
ranker.fit(x=self.data["X_train"], y=self.data["y_train"])
results = ranker.predict_proba(self.data["X_test"])

expected_ranks = [7, 1, 3, 0, 5, 2, 4, 6]

assert scores_align_with_ranks(expected_ranks, results[:,1])


@pytest.mark.parametrize('operator', OPERATOR_METHODS.keys())
Expand Down
2 changes: 1 addition & 1 deletion src/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ def sample_grid_config():
},
'triage.component.catwalk.baselines.rankers.PercentileRankOneFeature': {
'feature': ['feature_one', 'feature_two'],
'descend': [True]
'low_value_high_score': [True]
},
'triage.component.catwalk.baselines.thresholders.SimpleThresholder': {
'rules': [['feature_one > 3', 'feature_two <= 5']],
Expand Down
120 changes: 113 additions & 7 deletions src/triage/component/catwalk/baselines/rankers.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,39 @@
import verboselogs, logging
logger = verboselogs.VerboseLogger(__name__)
from scipy import stats
import numpy as np
import pandas as pd
from triage.component.catwalk.exceptions import BaselineFeatureNotInMatrix

REQUIRED_KEYS = frozenset(["feature", "low_value_high_score"])


class PercentileRankOneFeature:
def __init__(self, feature, descend=False):
def __init__(self, feature, low_value_high_score=None, descend=None):
logger.warning("DEPRECATION WARNING: PercentileRankOneFeature is being replaced by "
"BaselineRankMultiFeature. Note, however, that the scores returned by the new "
"ranker cannot be interpreted as percentiles."
)
if descend is not None:
# If the deprecated `descend` parameter has been specified, raise a
# warning, then use this value for low_value_high_score, which has
# the same behavior
logger.warning("DEPRECATION WARNING: parameter `descend` is deprecated for "
"PercentileRankOneFeature. Use `low_value_high_score` instead."
)
if low_value_high_score is not None:
raise ValueError("Only one of `descend` or `low_value_high_score` can be "
"specified for PercentileRankOneFeature."
)
low_value_high_score = descend

# set default this way so we can check if both have been specified above
if low_value_high_score is None:
low_value_high_score = False

self.feature = feature # which feature to rank on
self.descend = (
descend
self.low_value_high_score = (
low_value_high_score
) # should feature be ranked so lower values -> higher scores
self.feature_importances_ = None

Expand Down Expand Up @@ -48,7 +74,7 @@ def predict_proba(self, x):
# values of the feature. so if the entities have values [0, 0, 1, 2, 2],
# the first two entities will have the lowest ranks (and therefore the
# lowest risk scores) and the last two will have the highest ranks (and
# highest risk scores). for the descending method, we need to reverse
# highest risk scores). for the "low_value_high_score" method, we need to reverse
# this, and for both sorting directions, we need to convert the ranks to
# percentiles.

Expand All @@ -60,7 +86,7 @@ def predict_proba(self, x):
method = "min"
subtract = 1

# when descending: tied entities should get the *highest* rank, so for
# when `low_value_high_score=True`: tied entities should get the *highest* rank, so for
# [0, 0, 1, 2, 2] the ranks should be [2, 2, 3, 5, 5]. if we reverse
# these ranks by substracting all items from the maximum rank (5), we
# end up with the correct ranks for calculating percentiles:
Expand All @@ -70,15 +96,95 @@ def predict_proba(self, x):
# ([5, 5, 5, 5, 5] - [2, 2, 3, 5, 5]) / 5 = [0.6, 0.6, 0.4, 0, 0]
# and
# [1, 1, 1, 1, 1] - ([2, 2, 3, 5, 5] / 5) = [0.6, 0.6, 0.4, 0, 0]
if self.descend:
if self.low_value_high_score:
method = "max"
subtract = 0

# get the ranks and convert to percentiles
ranks = stats.rankdata(x, method)
ranks = [(rank - subtract) / len(x) for rank in ranks]
if self.descend:
if self.low_value_high_score:
ranks = [1 - rank for rank in ranks]

# format it like sklearn output and return
return np.array([np.zeros(len(x)), ranks]).transpose()


class BaselineRankMultiFeature:
def __init__(self, rules):
if not isinstance(rules, list):
rules = [rules]

# validate rules: must have feature and sort order
for rule in rules:
if not isinstance(rule, dict):
raise ValueError('Rules for BaselineRankMultiFeature must be of type dict')
if not rule.keys() >= REQUIRED_KEYS:
raise ValueError(f'BaselineRankMultiFeature rule "{rule}" missing one or more required keys ({REQUIRED_KEYS})')

self.rules = rules
self.feature_importances_ = None

@property
def all_feature_names(self):
return [rule["feature"] for rule in self.rules]

@property
def all_sort_directions(self):
# note that ascending=True sort will mean low values get low scores,
# so negate the parameter direction to get the right relationship
return [not rule['low_value_high_score'] for rule in self.rules]

def _set_feature_importances_(self, x):
""" Assigns feature importances following the rule: 1 for the features
we are thresholding on, 0 for all other features.
"""
feature_importances = [0] * len(x.columns)
for feature_name in self.all_feature_names:
try:
position = x.columns.get_loc(feature_name)
except KeyError:
raise BaselineFeatureNotInMatrix(
(
"Rules refer to a feature ({feature_name}) not included in "
"the training matrix!".format(feature_name=feature_name)
)
)
feature_importances[position] = 1
self.feature_importances_ = np.array(feature_importances)

def fit(self, x, y):
""" Set feature importances and return self.
"""
self._set_feature_importances_(x)
return self

def predict_proba(self, x):
""" Generate the rank scores and return these.
"""
# reduce x to the selected set of features
x = x[self.all_feature_names].reset_index(drop=True)

x = x.sort_values(self.all_feature_names, ascending=self.all_sort_directions)

# initialize curr_rank to -1 so the first record will have rank 0 (hence "score"
# will range from 0 to 1)
ranks = []
curr_rank = -1
prev = []

# calculate ranks over sorted records, giving ties the same rank
for rec in x.values:
if not np.array_equal(prev, rec):
curr_rank += 1
ranks.append(curr_rank)
prev = rec

# normalize to 0 to 1 range
x['score'] = [r/max(ranks) for r in ranks]

# reset back to original sort order, calculate "score" for "0 class"
scores_1 = x.sort_index()['score'].values
scores_0 = np.array([1-s for s in scores_1])

return np.array([scores_0, scores_1]).transpose()

0 comments on commit b4ff916

Please sign in to comment.