Added mask for poisson and uniform distribution, rename mean and std …

…with parameter1 and parameter2 where needed, finalised README file
wetransform-os · Nov 15, 2023 · c532f93 · c532f93
1 parent 8950f28
commit c532f93
Show file tree

Hide file tree

Showing 4 changed files with 52 additions and 27 deletions.
diff --git a/README.md b/README.md
@@ -4,12 +4,12 @@
 
 <!-- [![status](https://joss.theoj.org/papers/4214c6e588774490458e34630e8052c1/status.svg)](https://joss.theoj.org/papers/4214c6e588774490458e34630e8052c1) -->
 <!-- [![PyPi version](https://img.shields.io/pypi/v/promcda?color=blue)](https://pypi.org/project/promcda) -->
-[![pytest](https://github.com/wetransform-os/ProMCDA/actions/workflows/python-app-test.yml/badge.svg)](https://github.com/wetransform-os/ProMCDA/actions/workflows/python-app-test.yml)
+[![pytest](https://github.com/wetransform-os/ProMCDA/actions/workflows/python-app.yml/badge.svg)](https://github.com/wetransform-os/ProMCDA/actions/workflows/python-app.yml)
 ![License](https://img.shields.io/badge/license-EPL%202.0-blue)
 
 # Probabilistic Multi Criteria Decision Analysis
 
-A tool to estimate scores of alternatives and their uncertainties based on the Multi Criteria Decision Analysis (MCDA) approach.
+A tool to estimate scores of alternatives and their uncertainties based on a Multi Criteria Decision Analysis (MCDA) approach.
 
 ### MCDA quick overview and applications
 
@@ -47,7 +47,7 @@ These are just a few examples of how MCDA can be applied across a wide range of
 that involve multiple, often conflicting, criteria. The specific application of MCDA will depend on the context 
 and the goals of the decision-maker.
 
-In MCDA context an *alternative* is one possible course of action available; 
+In a MCDA context an *alternative* is one possible course of action available; 
 an *indicator* is a parameter that describes the alternatives.
 The variability of the MCDA scores are caused by:
 
@@ -146,13 +146,26 @@ effect.
 
 
 ### Requirements
+On Windows:
+```bash
+conda create --name <choose-a-name-like-Promcda> python=3.6
+activate.bat <choose-a-name-like-Promcda>
+pip install -r requirements.txt
+```
+On Mac and Linux:
 ```bash
 conda create --name <choose-a-name-like-Promcda> python=3.6
 source activate <choose-a-name-like-Promcda>
 pip install -r requirements.txt
 ```
 
 ### Running the code (from root dir)
+On Windows:
+```bash
+activate.bat <your-env>
+python3 -m mcda.mcda_run -c configuration.json
+```
+On Mac and Linux:
 ```bash
 source activate <your-env>
 python3 -m mcda.mcda_run -c configuration.json
@@ -178,14 +191,14 @@ If the weights are randomly sampled (robustness analysis of the weights), then:
 - if all weights are sampled together, MCDA calculations receive N-inputs (N being the number of `monte_carlo_runs`; 
   if the weights are sampled one at time, MCDA will receive (*n-inputs x num_weights*) inputs;
 - iterations 1,2,3 of the first condition follow;
-- the results of all the combinations normalization/aggregation (or the one selected) are provided in the form of mean and std over all the runs 
+- the results of all the combinations normalization/aggregation (or the one selected) are provided in the form of mean and standard deviation over all the runs 
   (if the weights are iteratively sampled, this applies for *num_indicators-times*).
 
 If the robustness analysis regards the indicators, then:
-- for each indicator, the mean and std are extracted from the input matrix;
+- for each indicator, the parameters (e.g., mean and standard deviation) describing the marginal distribution under interest are extracted from the input matrix;
 - for each N, and for each indicator, a value is sampled from the relative assigned marginal distribution: therefore, one of N input matrix is created;
 - normalizations and aggregations are performed as in points 1,2 of the first case: a list of all the results is created in the output directory;
-- mean and std of all the results are estimated across (monte_carlo_runs x pairs of combinations);  
+- mean and standard deviation of all the results are estimated across (monte_carlo_runs x pairs of combinations);  
 - in this case, no randomness on the weights is allowed.
 
 

diff --git a/mcda/mcda_run.py b/mcda/mcda_run.py
@@ -312,7 +312,7 @@ def main(input_config: dict):
             logger.info("Start ProMCDA with uncertainty on the indicators")
             is_average_larger_than_std = check_averages_larger_std(input_matrix_no_alternatives, config)
             if is_average_larger_than_std is False:
-                logger.info('Some std values of some indicators are larger than their averages.')
+                logger.info('Some standard deviation values of some indicators are larger than their averages.')
                 logger.info('Maybe you need to investigate the nature of your data.')
                 logger.info('If you continue, the negative values will be rescaled internally to a positive range.')
                 while True:

diff --git a/mcda/mcda_with_robustness.py b/mcda/mcda_with_robustness.py
@@ -59,7 +59,7 @@ def convert_list(data_list):
 
     def create_n_randomly_sampled_matrices(self) -> List[pd.DataFrame]:
         """
-        This function receives an input matrix of dimensions (Ax2I) whose columns represent means and standard deviations
+        This function receives an input matrix of dimensions (Ax2I) whose columns represent parameter 1 and parameter 2
         of each indicator. In a first step, it produces a list of length I of matrices of dimension (AxN).
         Every matrix represents the N random samples of every alternative (A), per indicator (I).
         If there are negative random samples, they are rescaled into [0-1].
@@ -73,6 +73,7 @@ def create_n_randomly_sampled_matrices(self) -> List[pd.DataFrame]:
         """
         marginal_pdf = self._config.monte_carlo_sampling["marginal_distribution_for_each_indicator"]
         is_exact_pdf_mask = check_if_pdf_is_exact(marginal_pdf)
+        is_poisson_pdf_mask = check_if_pdf_is_poisson(marginal_pdf)
 
         num_runs = self._config.monte_carlo_sampling["monte_carlo_runs"] # N
         input_matrix = self._input_matrix # (AxI)
@@ -83,32 +84,32 @@ def create_n_randomly_sampled_matrices(self) -> List[pd.DataFrame]:
 
         j=0
         for i, pdf_type in enumerate(is_exact_pdf_mask):
-            mean_col_position = j
-            if pdf_type == 0 and marginal_pdf[i] != 'poisson':  # non-exact PDF except Poisson
-                std_col_position = mean_col_position + 1  # standard deviation column follows mean
-                mean_col = input_matrix.columns[mean_col_position]
-                std_col = input_matrix.columns[std_col_position]
-                means = input_matrix[mean_col]
-                stds = input_matrix[std_col]
+            parameter1_col_position = j
+            if pdf_type == 0 and not is_poisson_pdf_mask[i]:  # non-exact PDF except Poisson
+                parameter2_col_position = parameter1_col_position + 1  # parameter 2 column follows parameter 1
+                parameter1_col = input_matrix.columns[parameter1_col_position]
+                parameter2_col = input_matrix.columns[parameter2_col_position]
+                parameter1 = input_matrix[parameter1_col]
+                parameter2 = input_matrix[parameter2_col]
                 j += 2
 
-            elif pdf_type == 1 or marginal_pdf[i] == 'poisson':  # exact PDF or Poisson
-                mean_col = input_matrix.columns[mean_col_position]
-                means = input_matrix[mean_col]
+            elif pdf_type == 1 or is_poisson_pdf_mask[i]:  # exact PDF or Poisson
+                parameter1_col = input_matrix.columns[parameter1_col_position]
+                parameter1 = input_matrix[parameter1_col]
                 j += 1
 
             distribution_type = marginal_pdf[i // 2]
 
             if distribution_type == 'exact':
-                samples = self.repeat_series_to_create_df(means, num_runs).T
+                samples = self.repeat_series_to_create_df(parameter1, num_runs).T
             elif distribution_type == 'normal':
-                samples = np.random.normal(loc=means, scale=stds, size=(num_runs, len(means)))
+                samples = np.random.normal(loc=parameter1, scale=parameter2, size=(num_runs, len(parameter1)))
             elif distribution_type == 'uniform':
-                samples = np.random.uniform(low=means, high=stds, size=(num_runs, len(means)))
+                samples = np.random.uniform(low=parameter1, high=parameter2, size=(num_runs, len(parameter1)))
             elif distribution_type == 'lnorm':
-                samples = np.random.lognormal(mean=means, sigma=stds, size=(num_runs, len(means)))
+                samples = np.random.lognormal(mean=parameter1, sigma=parameter2, size=(num_runs, len(parameter1)))
             elif distribution_type == 'poisson':
-                samples = np.random.poisson(lam=means, size=(num_runs, len(means)))
+                samples = np.random.poisson(lam=parameter1, size=(num_runs, len(parameter1)))
             else:
                 raise ValueError(f"Invalid marginal distribution type: {distribution_type}")
 

diff --git a/mcda/utils.py b/mcda/utils.py
@@ -155,11 +155,13 @@ def check_averages_larger_std(input_matrix: pd.DataFrame, config: dict) -> bool:
 
     marginal_pdf = config.monte_carlo_sampling["marginal_distribution_for_each_indicator"]
     is_exact_pdf_mask = check_if_pdf_is_exact(marginal_pdf)
+    is_poisson_pdf_mask = check_if_pdf_is_poisson(marginal_pdf)
+    is_uniform_pdf_mask = check_if_pdf_is_uniform(marginal_pdf)
 
     j = 0
     for i, pdf_type in enumerate(is_exact_pdf_mask):
         mean_col_position = j
-        if pdf_type == 0 and marginal_pdf[i] != 'uniform' and marginal_pdf[i] != 'poisson':  # non-exact PDF except for Uniform and Poisson
+        if pdf_type == 0 and not is_uniform_pdf_mask[i] and not is_poisson_pdf_mask[i]:  # non-exact PDF except for Uniform and Poisson distributions
             std_col_position = mean_col_position + 1  # standard deviation column follows mean
             mean_col = input_matrix.columns[mean_col_position]
             std_col = input_matrix.columns[std_col_position]
@@ -169,19 +171,28 @@ def check_averages_larger_std(input_matrix: pd.DataFrame, config: dict) -> bool:
 
             satisfies_condition = all(x >= y for x, y in zip(means, stds))
 
-        elif marginal_pdf[i] == 'uniform': # Uniform distribution
+        elif is_uniform_pdf_mask[i]: # Uniform distribution
             j += 2
-        elif pdf_type == 1 or marginal_pdf[i] == 'poisson':  # exact PDF or Poisson
+
+        elif pdf_type == 1 or is_poisson_pdf_mask[i]:  # exact PDF or Poisson distribution
             j += 1
 
     return satisfies_condition
 
-
 def check_if_pdf_is_exact(marginal_pdf: list) -> list:
     exact_pdf_mask = [1 if pdf == 'exact' else 0 for pdf in marginal_pdf]
 
     return exact_pdf_mask
 
+def check_if_pdf_is_poisson(marginal_pdf: list) -> list:
+    poisson_pdf_mask = [1 if pdf == 'poisson' else 0 for pdf in marginal_pdf]
+
+    return poisson_pdf_mask
+
+def check_if_pdf_is_uniform(marginal_pdf: list) -> list:
+    uniform_pdf_mask = [1 if pdf == 'uniform' else 0 for pdf in marginal_pdf]
+
+    return uniform_pdf_mask
 
 def plot_norm_scores_without_uncert(scores: pd.DataFrame) -> object:
     num_of_combinations = scores.shape[1] - 1