fix test_aggregate_with_specific_normalization_and_aggregation_methods

wetransform-os · Nov 13, 2024 · d980127 · d980127
1 parent 5344494
commit d980127
Show file tree

Hide file tree

Showing 2 changed files with 38 additions and 68 deletions.
diff --git a/mcda/models/mcda_without_robustness.py b/mcda/models/mcda_without_robustness.py
@@ -1,3 +1,4 @@
+import re
 import sys
 import copy
 import logging
@@ -109,24 +110,17 @@ def aggregate_indicators(self, normalized_indicators: pd.DataFrame, weights: lis
         Returns:
         - A DataFrame containing the aggregated scores for each alternative and normalization method.
         """
-        # Convert `method` to string if it’s an enum instance
         if isinstance(method, AggregationFunctions):
             method = method.value
 
         self.normalized_indicators = normalized_indicators
         self.weights = weights
 
-        agg = Aggregation(self.weights)
+        agg= Aggregation(weights)
 
-        # Dictionary to map aggregation methods to their corresponding score DataFrames
-        score_dfs = {
-            AggregationFunctions.WEIGHTED_SUM.value: pd.DataFrame(),
-            AggregationFunctions.GEOMETRIC.value: pd.DataFrame(),
-            AggregationFunctions.HARMONIC.value: pd.DataFrame(),
-            AggregationFunctions.MINIMUM.value: pd.DataFrame(),
-        }
+        score_list = []
 
-        def _apply_aggregation(agg_method, df_subset, suffix):
+        def _apply_aggregation(norm_method, agg_method, df_subset):
             """
             Apply the aggregation method to a subset of the DataFrame and store results in the appropriate DataFrame.
             """
@@ -137,63 +131,39 @@ def _apply_aggregation(agg_method, df_subset, suffix):
                 AggregationFunctions.MINIMUM.value: agg.minimum,
             }.get(agg_method)
 
-            if agg_function:
-                aggregated_scores = agg_function(df_subset)
-
-                if isinstance(aggregated_scores, pd.Series):
-                    aggregated_scores = aggregated_scores.to_frame()
-
-                aggregated_scores.columns = [f"{col}_{agg_method}_{suffix}" for col in
-                                             df_subset.columns.unique(level=0)]
-
-        for base_col_name in self.normalized_indicators.columns.str.split("_").str[0].unique():
-            relevant_columns = self.normalized_indicators.filter(regex=f"^{base_col_name}_")
-
-            for suffix in relevant_columns.columns.str.split("_", n=1).str[1].unique():
-                # Define the correct columns based on whether "without_zero" is in the suffix or not
-                if method is None or method == AggregationFunctions.WEIGHTED_SUM.value:
-                    if "without_zero" not in suffix:
-                        # Only select columns ending with the exact suffix that doesn't contain "without_zero"
-                        selected_columns = relevant_columns.filter(regex=f"_{suffix}$")
-                        _apply_aggregation(AggregationFunctions.WEIGHTED_SUM.value, selected_columns, suffix)
-
-                elif method in [AggregationFunctions.GEOMETRIC.value, AggregationFunctions.HARMONIC.value]:
-                    if "without_zero" in suffix:
-                        selected_columns = relevant_columns.filter(regex=f"_{suffix}$")
-                        if method == AggregationFunctions.GEOMETRIC.value:
-                            _apply_aggregation(AggregationFunctions.GEOMETRIC.value, selected_columns, suffix)
-                        elif method == AggregationFunctions.HARMONIC.value:
-                            _apply_aggregation(AggregationFunctions.HARMONIC.value, selected_columns, suffix)
-
-                elif method == AggregationFunctions.MINIMUM.value:
-                    if "without_zero" not in suffix:
-                        selected_columns = relevant_columns.filter(regex=f"_{suffix}$")
-                        _apply_aggregation(AggregationFunctions.MINIMUM.value, selected_columns, suffix)
-
-        # Loop through all columns to detect normalization methods
-        # for normalization_col_name in self.normalized_indicators.columns.str.split("_").str[1].unique():
-        #     suffix = normalized_indicators.columns.str.split("_", n=1).str[1]
-        #     relevant_columns = self.normalized_indicators.filter(regex=f"_{normalization_col_name}(_|$)")
-        #
-        #      # weighted_sum
-        #     if method is None or method == AggregationFunctions.WEIGHTED_SUM.value:
-        #         if "without_zero" not in suffix:
-        #             _apply_aggregation(AggregationFunctions.WEIGHTED_SUM.value, relevant_columns, suffix)
-        #
-        #     # geometric or harmonic
-        #     if method in [AggregationFunctions.GEOMETRIC.value,
-        #                   AggregationFunctions.HARMONIC.value] and "without_zero" in suffix:
-        #     # minimum
-        #         if method == AggregationFunctions.GEOMETRIC.value:
-        #             _apply_aggregation(AggregationFunctions.GEOMETRIC.value, relevant_columns,
-        #                                    f"_geom_{suffix}")
-        #         elif method == AggregationFunctions.HARMONIC.value:
-        #             _apply_aggregation(AggregationFunctions.HARMONIC.value, relevant_columns, f"_harm_{suffix}")
-        #     if method == AggregationFunctions.MINIMUM.value:
-        #         if "without_zero" not in suffix:
-        #             _apply_aggregation(AggregationFunctions.MINIMUM.value, selected_columns, f"_min_{suffix}")
+            aggregated_scores = agg_function(df_subset)
+
+            if isinstance(aggregated_scores, pd.Series):
+                aggregated_scores = aggregated_scores.to_frame()
+
+            aggregated_scores.columns = [f"{norm_method}_{agg_method}"]
+
+            score_list.append(aggregated_scores)
+
+        for norm_method in self.normalized_indicators.columns.str.split("_", n=1).str[1].unique():
+
+            without_zero_columns = self.normalized_indicators.filter(regex="without_zero$")
+            with_zero_columns = self.normalized_indicators[self.normalized_indicators.columns.difference(without_zero_columns.columns)]
+
+            # Apply WEIGHTED_SUM only to columns with zero in the suffix
+            if method is None or method == AggregationFunctions.WEIGHTED_SUM.value:
+                _apply_aggregation(norm_method, AggregationFunctions.WEIGHTED_SUM.value,
+                                   with_zero_columns)
+
+            # Apply GEOMETRIC and HARMONIC only to columns without zero in the suffix
+            if method == AggregationFunctions.GEOMETRIC.value:
+                _apply_aggregation(norm_method, AggregationFunctions.GEOMETRIC.value,
+                                   without_zero_columns)
+            elif method == AggregationFunctions.HARMONIC.value:
+                _apply_aggregation(norm_method, AggregationFunctions.HARMONIC.value,
+                                   without_zero_columns)
+
+            # Apply MINIMUM only to columns with zero in the suffix
+            if method == AggregationFunctions.MINIMUM.value:
+                _apply_aggregation(norm_method, AggregationFunctions.MINIMUM.value,
+                                   with_zero_columns)
 
         # Concatenate all score DataFrames into a single DataFrame
-        scores = pd.concat([df for df in score_dfs.values() if not df.empty], axis=1)
+        scores = pd.concat(score_list, axis=1)
 
         return scores
diff --git a/tests/unit_tests/test_promcda.py b/tests/unit_tests/test_promcda.py
@@ -147,12 +147,12 @@ def test_aggregate_with_specific_normalization_and_aggregation_methods(self):
                                               weights=self.robustness['given_weights'])
 
         # When
-        expected_columns = ['ws-minmax_01']
+        expected_columns = ['minmax_01_weighted_sum', 'minmax_without_zero_weighted_sum']
 
         # Then
         self.assertCountEqual(aggregated_scores.columns, expected_columns, "Only specified methods should be applied.")
         self.assertTrue(
-            (aggregated_scores['ws-minmax_01'] >= 0).all() and (aggregated_scores['ws-minmax_01'] <= 1).all(),
+            (aggregated_scores['minmax_01_weighted_sum'] >= 0).all() and (aggregated_scores['minmax_01_weighted_sum'] <= 1).all(),
             "Values should be in the range [0, 1] for minmax normalization with weighted sum.")