Skip to content

Commit 14c9f42

Browse files
authored
21483: Adjusts settings for matrix normalizations, MAJOR (#281)
1 parent 9866504 commit 14c9f42

File tree

5 files changed

+28
-102
lines changed

5 files changed

+28
-102
lines changed

howso/client/typing.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ class Evaluation(TypedDict):
5252
NewCaseThreshold: TypeAlias = Literal["max", "min", "most_similar"]
5353
"""Valid values for ``new_case_threshold`` parameters."""
5454

55-
NormalizeMethod: TypeAlias = Literal["feature_count", "fractional", "relative"]
55+
NormalizeMethod: TypeAlias = Literal["fractional_absolute", "fractional", "relative"]
5656
"""Valid values for ``normalize_method`` parameters."""
5757

5858
PathLike: TypeAlias = Union[str, os.PathLike]

howso/engine/tests/test_engine.py

-4
Original file line numberDiff line numberDiff line change
@@ -295,7 +295,6 @@ def test_get_contribution_matrix(self, trainee):
295295
"""Test `get_contribution_matrix`."""
296296
matrix = trainee.get_contribution_matrix(
297297
normalize=True,
298-
absolute=True,
299298
fill_diagonal=True
300299
)
301300
assert len(matrix) == 5
@@ -311,7 +310,6 @@ def test_get_contribution_matrix(self, trainee):
311310
saved_matrix = matrix_processing(
312311
saved_matrix['contribution'],
313312
normalize=True,
314-
absolute=True,
315313
fill_diagonal=True
316314
)
317315

@@ -320,7 +318,6 @@ def test_get_contribution_matrix(self, trainee):
320318
def test_get_mda_matrix(self, trainee):
321319
"""Test `get_mda_matrix`."""
322320
matrix = trainee.get_mda_matrix(
323-
normalize=True,
324321
absolute=True,
325322
fill_diagonal=True
326323
)
@@ -336,7 +333,6 @@ def test_get_mda_matrix(self, trainee):
336333

337334
saved_matrix = matrix_processing(
338335
saved_matrix['mda'],
339-
normalize=True,
340336
absolute=True,
341337
fill_diagonal=True
342338
)

howso/engine/trainee.py

+7-28
Original file line numberDiff line numberDiff line change
@@ -3585,11 +3585,7 @@ def get_contribution_matrix(
35853585
directional: bool = False,
35863586
robust: bool = True,
35873587
targeted: bool = False,
3588-
normalize: bool = False,
3589-
normalize_method: NormalizeMethod | Callable | Iterable[
3590-
NormalizeMethod | Callable
3591-
] = "relative",
3592-
absolute: bool = False,
3588+
normalize: bool = True,
35933589
fill_diagonal: bool = True,
35943590
fill_diagonal_value: float | int = 1,
35953591
) -> DataFrame:
@@ -3609,25 +3605,8 @@ def get_contribution_matrix(
36093605
targeted : bool, default False
36103606
Whether to do a targeted re-analyze before each feature's contribution is calculated.
36113607
normalize : bool, default False
3612-
Whether to normalize the matrix row wise. Normalization method is set by the ``normalize_method``
3613-
parameter.
3614-
normalize_method : str or callable or iterable of str or callable, default "relative"
3615-
The normalization method. The method may either one of the strings below that correspond to a
3616-
default method or a custom callable.
3617-
3618-
These methods may be passed in as an individual string or in a iterable where they will
3619-
be processed sequentially.
3620-
3621-
Default Methods:
3622-
- 'relative': normalizes each row by dividing each value by the maximum absolute value in the row.
3623-
- 'fractional': normalizes each row by dividing each value by the sum of absolute values in the row.
3624-
- 'feature_count': normalizes each row by dividing by the feature count.
3625-
3626-
Custom Callable:
3627-
- If a custom Callable is provided, then it will be passed onto the DataFrame apply function:
3628-
``matrix.apply(Callable)``
3629-
absolute : bool, default False
3630-
Whether to transform the matrix values into the absolute values.
3608+
Whether to normalize the matrix row wise. If True, normalizes each row by dividing each value
3609+
by the sum of the values in the row, so the fractional values sum to 1.
36313610
fill_diagonal : bool, default False
36323611
Whether to fill in the diagonals of the matrix. If set to true,
36333612
the diagonal values will be filled in based on the ``fill_diagonal_value`` value.
@@ -3675,8 +3654,7 @@ def get_contribution_matrix(
36753654
matrix = matrix_processing(
36763655
matrix,
36773656
normalize=normalize,
3678-
normalize_method=normalize_method,
3679-
absolute=absolute,
3657+
normalize_method="fractional",
36803658
fill_diagonal=fill_diagonal,
36813659
fill_diagonal_value=fill_diagonal_value
36823660
)
@@ -3721,8 +3699,9 @@ def get_mda_matrix(
37213699
37223700
Default Methods:
37233701
- 'relative': normalizes each row by dividing each value by the maximum absolute value in the row.
3724-
- 'fractional': normalizes each row by dividing each value by the sum of absolute values in the row.
3725-
- 'feature_count': normalizes each row by dividing by the feature count.
3702+
- 'fractional': normalizes each row by dividing each value by the sum of the values in the row, so the relative
3703+
values sum to 1.
3704+
- 'fractional_absolute': normalizes each row by dividing each value by the sum of absolute values in the row.
37263705
37273706
Custom Callable:
37283707
- If a custom Callable is provided, then it will be passed onto the DataFrame apply function:

howso/utilities/tests/test_utilities.py

+13-61
Original file line numberDiff line numberDiff line change
@@ -352,6 +352,7 @@ def test_matrix_processing(
352352
processed_matrix = matrix_processing(
353353
matrix=df,
354354
normalize=normalize,
355+
normalize_method='relative',
355356
ignore_diagonals_normalize=ignore_diagonals_normalize,
356357
absolute=absolute,
357358
fill_diagonal=fill_diagonal,
@@ -364,6 +365,7 @@ def test_matrix_processing(
364365
processed_matrix = matrix_processing(
365366
matrix=df,
366367
normalize=normalize,
368+
normalize_method='relative',
367369
ignore_diagonals_normalize=ignore_diagonals_normalize,
368370
absolute=absolute,
369371
fill_diagonal=fill_diagonal,
@@ -384,6 +386,7 @@ def test_matrix_processing(
384386
processed_matrix = matrix_processing(
385387
matrix=df,
386388
normalize=normalize,
389+
normalize_method='relative',
387390
ignore_diagonals_normalize=ignore_diagonals_normalize,
388391
absolute=absolute,
389392
fill_diagonal=fill_diagonal,
@@ -403,6 +406,7 @@ def test_matrix_processing(
403406
processed_matrix = matrix_processing(
404407
matrix=df,
405408
normalize=normalize,
409+
normalize_method='relative',
406410
ignore_diagonals_normalize=ignore_diagonals_normalize,
407411
absolute=absolute,
408412
fill_diagonal=fill_diagonal,
@@ -421,6 +425,7 @@ def test_matrix_processing(
421425
processed_matrix = matrix_processing(
422426
matrix=df,
423427
normalize=normalize,
428+
normalize_method='relative',
424429
ignore_diagonals_normalize=ignore_diagonals_normalize,
425430
absolute=absolute,
426431
fill_diagonal=fill_diagonal,
@@ -439,6 +444,7 @@ def test_matrix_processing(
439444
processed_matrix = matrix_processing(
440445
matrix=df,
441446
normalize=normalize,
447+
normalize_method='relative',
442448
ignore_diagonals_normalize=ignore_diagonals_normalize,
443449
absolute=absolute,
444450
fill_diagonal=fill_diagonal,
@@ -456,9 +462,8 @@ def test_matrix_processing(
456462
@pytest.mark.parametrize(
457463
'normalize_method',
458464
(
459-
('sum'),
460-
('absolute_sum'),
461-
('feature_count'),
465+
('fractional'),
466+
('fractional_absolute'),
462467
)
463468
)
464469
def test_matrix_processing_normalization_single_method(
@@ -471,7 +476,7 @@ def test_matrix_processing_normalization_single_method(
471476
'c': [0.5, -1.5, 3.0],
472477
}, index=['a', 'b', 'c']).T
473478

474-
if normalize_method == 'sum':
479+
if normalize_method == 'fractional':
475480
processed_matrix = round(
476481
matrix_processing(
477482
matrix=df,
@@ -487,7 +492,7 @@ def test_matrix_processing_normalization_single_method(
487492

488493
assert_frame_equal(processed_matrix, correct_matrix)
489494

490-
if normalize_method == 'absolute_sum':
495+
if normalize_method == 'fractional_absolute':
491496
processed_matrix = round(
492497
matrix_processing(
493498
matrix=df,
@@ -503,59 +508,6 @@ def test_matrix_processing_normalization_single_method(
503508

504509
assert_frame_equal(processed_matrix, correct_matrix)
505510

506-
if normalize_method == 'feature_count':
507-
processed_matrix = round(
508-
matrix_processing(
509-
matrix=df,
510-
normalize=True,
511-
ignore_diagonals_normalize=False,
512-
normalize_method=normalize_method
513-
), 2)
514-
correct_matrix = pd.DataFrame({
515-
'a': [0.33, -1.00, 2.00],
516-
'b': [0.17, -0.17, 0.33],
517-
'c': [0.17, -0.50, 1.00]
518-
}, index=['a', 'b', 'c']).T
519-
520-
assert_frame_equal(processed_matrix, correct_matrix)
521-
522-
523-
def test_matrix_processing_normalization_list(
524-
normalize_method=['feature_count', 'sum'],
525-
):
526-
"""Tests that `matrix_processing` normalization parameters with lists works properly."""
527-
df = pd.DataFrame({
528-
'a': [1.0, -3.0, 6.0],
529-
'b': [0.5, -0.5, 1.0],
530-
'c': [0.5, -1.5, 3.0],
531-
}, index=['a', 'b', 'c']).T
532-
533-
processed_matrix = round(
534-
matrix_processing(
535-
matrix=df,
536-
normalize=True,
537-
ignore_diagonals_normalize=False,
538-
normalize_method=normalize_method
539-
), 2)
540-
541-
# When a list is the parameter, the methods inside are calculated sequentialy. It should be the
542-
# same as doing the two methods sequentially independently as well.
543-
correct_matrix = matrix_processing(
544-
matrix=df,
545-
normalize=True,
546-
ignore_diagonals_normalize=False,
547-
normalize_method=normalize_method[0]
548-
)
549-
correct_matrix = round(
550-
matrix_processing(
551-
matrix=correct_matrix,
552-
normalize=True,
553-
ignore_diagonals_normalize=False,
554-
normalize_method=normalize_method[1]
555-
), 2)
556-
557-
assert_frame_equal(processed_matrix, correct_matrix)
558-
559511

560512
def test_matrix_processing_normalization_callable():
561513
"""Tests that `matrix_processing` normalization parameters with Callable works properly."""
@@ -565,7 +517,7 @@ def test_matrix_processing_normalization_callable():
565517
'c': [0.5, -1.5, 3.0],
566518
}, index=['a', 'b', 'c']).T
567519

568-
# This is the exact same function as the 'absolute_sum' normalization method,
520+
# This is the exact same function as the 'fractional_absolute' normalization method,
569521
# thus it should return the same results.
570522
def divide_by_sum_abs(row):
571523
sum_abs = row.abs().sum()
@@ -584,7 +536,7 @@ def divide_by_sum_abs(row):
584536
matrix=df,
585537
normalize=True,
586538
ignore_diagonals_normalize=False,
587-
normalize_method='absolute_sum'
539+
normalize_method='fractional_absolute'
588540
), 2)
589541

590542
assert_frame_equal(processed_matrix, correct_matrix)
@@ -605,7 +557,7 @@ def test_matrix_processing_normalization_zero_division():
605557
matrix=df,
606558
normalize=True,
607559
ignore_diagonals_normalize=False,
608-
normalize_method='sum'
560+
normalize_method='fractional'
609561
), 2)
610562

611563
# First row is returned unnormalized

howso/utilities/utilities.py

+7-8
Original file line numberDiff line numberDiff line change
@@ -1176,7 +1176,7 @@ def deep_update(base, updates):
11761176
def matrix_processing( # noqa
11771177
matrix: pd.DataFrame,
11781178
normalize: bool = False,
1179-
normalize_method: Iterable[NormalizeMethod | Callable] | NormalizeMethod | Callable = "relative",
1179+
normalize_method: Iterable[NormalizeMethod | Callable] | NormalizeMethod | Callable = "fractional",
11801180
ignore_diagonals_normalize: bool = True,
11811181
absolute: bool = False,
11821182
fill_diagonal: bool = False,
@@ -1203,8 +1203,9 @@ def matrix_processing( # noqa
12031203
12041204
Default Methods:
12051205
- 'relative': normalizes each row by dividing each value by the maximum absolute value in the row.
1206-
- 'fractional': normalizes each row by dividing each value by the sum of absolute values in the row.
1207-
- 'feature_count': normalizes each row by dividing by the feature count.
1206+
- 'fractional': normalizes each row by dividing each value by the sum of the values in the row, so the relative
1207+
values sum to 1.
1208+
- 'fractional_absolute': normalizes each row by dividing each value by the sum of absolute values in the row.
12081209
12091210
Custom Callable:
12101211
- If a custom Callable is provided, then it will be passed onto the DataFrame apply function:
@@ -1286,18 +1287,16 @@ def divide_by_max_abs(row):
12861287
for method in normalize_method:
12871288
if method == "relative":
12881289
matrix = matrix.apply(divide_by_max_abs, axis=1) # type: ignore
1289-
elif method == "sum":
1290+
elif method == "fractional":
12901291
matrix = matrix.apply(sum_if_not_zero, axis=1) # type: ignore
1291-
elif method == "absolute_sum":
1292+
elif method == "fractional_absolute":
12921293
matrix = matrix.apply(abs_sum_if_not_zero, axis=1) # type: ignore
1293-
elif method == "feature_count":
1294-
matrix = matrix.apply(lambda x: x / len(matrix.columns), axis=1) # type: ignore
12951294
elif callable(method):
12961295
matrix = matrix.apply(method, axis=1) # type: ignore
12971296
else:
12981297
raise ValueError(
12991298
f"Invalid normalization method: {normalize_method}. "
1300-
"Must be 'relative', 'sum', 'absolute_sum', 'feature_count' or a Callable."
1299+
"Must be 'relative', 'fractional', 'fractional_absolute', or a Callable."
13011300
)
13021301
if ignore_diagonals_normalize:
13031302
for i, value in enumerate(diagonal_values):

0 commit comments

Comments
 (0)