diff --git a/eis_toolkit/transformations/coda/alr.py b/eis_toolkit/transformations/coda/alr.py index ab880c8c..d9b5af9b 100644 --- a/eis_toolkit/transformations/coda/alr.py +++ b/eis_toolkit/transformations/coda/alr.py @@ -1,88 +1,88 @@ -from numbers import Number - -import numpy as np -import pandas as pd -from beartype import beartype -from beartype.typing import Optional, Sequence - -from eis_toolkit.exceptions import InvalidColumnException, NumericValueSignException -from eis_toolkit.utilities.aitchison_geometry import _closure -from eis_toolkit.utilities.checks.compositional import check_in_simplex_sample_space -from eis_toolkit.utilities.miscellaneous import rename_columns_by_pattern - - -@beartype -def _alr_transform(df: pd.DataFrame, columns: Sequence[str], denominator_column: str) -> pd.DataFrame: - - ratios = df[columns].div(df[denominator_column], axis=0) - return np.log(ratios) - - -@beartype -def alr_transform( - df: pd.DataFrame, column: Optional[str] = None, keep_denominator_column: bool = False -) -> pd.DataFrame: - """ - Perform an additive logratio transformation on the data. - - Args: - df: A dataframe of compositional data. - column: The name of the column to be used as the denominator column. - keep_denominator_column: Whether to include the denominator column in the result. If True, the returned - dataframe retains its original shape. - - Returns: - A new dataframe containing the ALR transformed data. - - Raises: - InvalidColumnException: The input column isn't found in the dataframe. - InvalidCompositionException: Data is not normalized to the expected value. - NumericValueSignException: Data contains zeros or negative values. - """ - check_in_simplex_sample_space(df) - - if column is not None and column not in df.columns: - raise InvalidColumnException(f"The column {column} was not found in the dataframe.") - - column = column if column is not None else df.columns[-1] - - columns = [col for col in df.columns] - - if not keep_denominator_column and column in columns: - columns.remove(column) - - return rename_columns_by_pattern(_alr_transform(df, columns, column)) - - -@beartype -def _inverse_alr(df: pd.DataFrame, denominator_column: str, scale: Number = 1.0) -> pd.DataFrame: - dfc = df.copy() - - if denominator_column not in dfc.columns.values: - # Add the denominator column - dfc[denominator_column] = 0.0 - - return _closure(np.exp(dfc), scale) - - -@beartype -def inverse_alr(df: pd.DataFrame, denominator_column: str, scale: Number = 1.0) -> pd.DataFrame: - """ - Perform the inverse transformation for a set of ALR transformed data. - - Args: - df: A dataframe of ALR transformed compositional data. - denominator_column: The name of the denominator column. - scale: The value to which each composition should be normalized. Eg., if the composition is expressed - as percentages, scale=100. - - Returns: - A dataframe containing the inverse transformed data. - - Raises: - NumericValueSignException: The input scale value is zero or less. - """ - if scale <= 0: - raise NumericValueSignException("The scale value should be positive.") - - return _inverse_alr(df, denominator_column, scale) +from numbers import Number + +import numpy as np +import pandas as pd +from beartype import beartype +from beartype.typing import Optional, Sequence + +from eis_toolkit.exceptions import InvalidColumnException, NumericValueSignException +from eis_toolkit.utilities.aitchison_geometry import _closure +from eis_toolkit.utilities.checks.compositional import check_compositional_data +from eis_toolkit.utilities.miscellaneous import rename_columns_by_pattern + + +@beartype +def _alr_transform(df: pd.DataFrame, columns: Sequence[str], denominator_column: str) -> pd.DataFrame: + + ratios = df[columns].div(df[denominator_column], axis=0) + return np.log(ratios) + + +@beartype +def alr_transform( + df: pd.DataFrame, column: Optional[str] = None, keep_denominator_column: bool = False +) -> pd.DataFrame: + """ + Perform an additive logratio transformation on the data. + + Args: + df: A dataframe of compositional data. + column: The name of the column to be used as the denominator column. + keep_denominator_column: Whether to include the denominator column in the result. If True, the returned + dataframe retains its original shape. + + Returns: + A new dataframe containing the ALR transformed data. + + Raises: + InvalidColumnException: The input column isn't found in the dataframe. + InvalidCompositionException: Data is not normalized to the expected value. + NumericValueSignException: Data contains zeros or negative values. + """ + check_compositional_data(df) + + if column is not None and column not in df.columns: + raise InvalidColumnException(f"The column {column} was not found in the dataframe.") + + column = column if column is not None else df.columns[-1] + + columns = [col for col in df.columns] + + if not keep_denominator_column and column in columns: + columns.remove(column) + + return rename_columns_by_pattern(_alr_transform(df, columns, column)) + + +@beartype +def _inverse_alr(df: pd.DataFrame, denominator_column: str, scale: Number = 1.0) -> pd.DataFrame: + dfc = df.copy() + + if denominator_column not in dfc.columns.values: + # Add the denominator column + dfc[denominator_column] = 0.0 + + return _closure(np.exp(dfc), scale) + + +@beartype +def inverse_alr(df: pd.DataFrame, denominator_column: str, scale: Number = 1.0) -> pd.DataFrame: + """ + Perform the inverse transformation for a set of ALR transformed data. + + Args: + df: A dataframe of ALR transformed compositional data. + denominator_column: The name of the denominator column. + scale: The value to which each composition should be normalized. Eg., if the composition is expressed + as percentages, scale=100. + + Returns: + A dataframe containing the inverse transformed data. + + Raises: + NumericValueSignException: The input scale value is zero or less. + """ + if scale <= 0: + raise NumericValueSignException("The scale value should be positive.") + + return _inverse_alr(df, denominator_column, scale) diff --git a/eis_toolkit/transformations/coda/clr.py b/eis_toolkit/transformations/coda/clr.py index a6022b4f..d5a82215 100644 --- a/eis_toolkit/transformations/coda/clr.py +++ b/eis_toolkit/transformations/coda/clr.py @@ -1,79 +1,79 @@ -from numbers import Number - -import numpy as np -import pandas as pd -from beartype import beartype -from beartype.typing import Optional, Sequence -from scipy.stats import gmean - -from eis_toolkit.exceptions import NumericValueSignException -from eis_toolkit.utilities.aitchison_geometry import _closure -from eis_toolkit.utilities.checks.compositional import check_in_simplex_sample_space -from eis_toolkit.utilities.miscellaneous import rename_columns, rename_columns_by_pattern - - -@beartype -def _centered_ratio(row: pd.Series) -> pd.Series: - - return row / gmean(row) - - -@beartype -def _clr_transform(df: pd.DataFrame) -> pd.DataFrame: - - dfc = df.copy() - dfc = dfc.apply(_centered_ratio, axis=1) - - return np.log(dfc) - - -@beartype -def clr_transform(df: pd.DataFrame) -> pd.DataFrame: - """ - Perform a centered logratio transformation on the data. - - Args: - df: A dataframe of compositional data. - - Returns: - A new dataframe containing the CLR transformed data. - - Raises: - InvalidCompositionException: Data is not normalized to the expected value. - NumericValueSignException: Data contains zeros or negative values. - """ - check_in_simplex_sample_space(df) - return rename_columns_by_pattern(_clr_transform(df)) - - -@beartype -def _inverse_clr(df: pd.DataFrame, colnames: Optional[Sequence[str]] = None, scale: Number = 1.0) -> pd.DataFrame: - inverse = _closure(np.exp(df), scale) - - if colnames is not None: - return rename_columns(inverse, colnames) - - return inverse - - -@beartype -def inverse_clr(df: pd.DataFrame, colnames: Optional[Sequence[str]] = None, scale: Number = 1.0) -> pd.DataFrame: - """ - Perform the inverse transformation for a set of CLR transformed data. - - Args: - df: A dataframe of CLR transformed compositional data. - colnames: List of column names to rename the columns to. - scale: The value to which each composition should be normalized. Eg., if the composition is expressed - as percentages, scale=100. - - Returns: - A dataframe containing the inverse transformed data. - - Raises: - NumericValueSignException: The input scale value is zero or less. - """ - if scale <= 0: - raise NumericValueSignException("The scale value should be positive.") - - return _inverse_clr(df, colnames, scale) +from numbers import Number + +import numpy as np +import pandas as pd +from beartype import beartype +from beartype.typing import Optional, Sequence +from scipy.stats import gmean + +from eis_toolkit.exceptions import NumericValueSignException +from eis_toolkit.utilities.aitchison_geometry import _closure +from eis_toolkit.utilities.checks.compositional import check_compositional_data +from eis_toolkit.utilities.miscellaneous import rename_columns, rename_columns_by_pattern + + +@beartype +def _centered_ratio(row: pd.Series) -> pd.Series: + + return row / gmean(row) + + +@beartype +def _clr_transform(df: pd.DataFrame) -> pd.DataFrame: + + dfc = df.copy() + dfc = dfc.apply(_centered_ratio, axis=1) + + return np.log(dfc) + + +@beartype +def clr_transform(df: pd.DataFrame) -> pd.DataFrame: + """ + Perform a centered logratio transformation on the data. + + Args: + df: A dataframe of compositional data. + + Returns: + A new dataframe containing the CLR transformed data. + + Raises: + InvalidCompositionException: Data is not normalized to the expected value. + NumericValueSignException: Data contains zeros or negative values. + """ + check_compositional_data(df) + return rename_columns_by_pattern(_clr_transform(df)) + + +@beartype +def _inverse_clr(df: pd.DataFrame, colnames: Optional[Sequence[str]] = None, scale: Number = 1.0) -> pd.DataFrame: + inverse = _closure(np.exp(df), scale) + + if colnames is not None: + return rename_columns(inverse, colnames) + + return inverse + + +@beartype +def inverse_clr(df: pd.DataFrame, colnames: Optional[Sequence[str]] = None, scale: Number = 1.0) -> pd.DataFrame: + """ + Perform the inverse transformation for a set of CLR transformed data. + + Args: + df: A dataframe of CLR transformed compositional data. + colnames: List of column names to rename the columns to. + scale: The value to which each composition should be normalized. Eg., if the composition is expressed + as percentages, scale=100. + + Returns: + A dataframe containing the inverse transformed data. + + Raises: + NumericValueSignException: The input scale value is zero or less. + """ + if scale <= 0: + raise NumericValueSignException("The scale value should be positive.") + + return _inverse_clr(df, colnames, scale) diff --git a/eis_toolkit/transformations/coda/ilr.py b/eis_toolkit/transformations/coda/ilr.py index ed8831f1..421ca99d 100644 --- a/eis_toolkit/transformations/coda/ilr.py +++ b/eis_toolkit/transformations/coda/ilr.py @@ -1,100 +1,100 @@ -import numpy as np -import pandas as pd -from beartype import beartype -from beartype.typing import Sequence -from scipy.stats import gmean - -from eis_toolkit.exceptions import InvalidColumnException, InvalidCompositionException, InvalidParameterValueException -from eis_toolkit.utilities.checks.compositional import check_in_simplex_sample_space -from eis_toolkit.utilities.checks.dataframe import check_columns_valid -from eis_toolkit.utilities.checks.parameter import check_lists_overlap, check_numeric_value_sign - - -@beartype -def _calculate_ilr_scaling_factor(c1: int, c2: int) -> np.float64: - """ - Calculate the scaling factor for the ILR transform. - - Args: - c1: The cardinality of the first subcomposition. - c2: The cardinality of the second subcomposition. - - Returns: - The scaling factor. - - Raises: - InvalidParameterValueException: One or both of the input values are zero or negative. - """ - if not (check_numeric_value_sign(c1) and check_numeric_value_sign(c2)): - raise InvalidParameterValueException("Input values must both be positive integers.") - - return np.sqrt((c1 * c2) / np.float64(c1 + c2)) - - -@beartype -def _geometric_mean_logratio( - row: pd.Series, subcomposition_1: Sequence[str], subcomposition_2: Sequence[str] -) -> np.float64: - - numerator = gmean(row[subcomposition_1]) - denominator = gmean(row[subcomposition_2]) - return np.log(numerator / denominator) - - -@beartype -def _single_ilr_transform( - df: pd.DataFrame, subcomposition_1: Sequence[str], subcomposition_2: Sequence[str] -) -> pd.Series: - - dfc = df.copy() - - c1 = len(subcomposition_1) - c2 = len(subcomposition_2) - - # A Series to hold the transformed rows - ilr_values = pd.Series([0.0] * df.shape[0]) - - for idx, row in dfc.iterrows(): - ilr_values[idx] = _geometric_mean_logratio(row, subcomposition_1, subcomposition_2) - - ilr_values = _calculate_ilr_scaling_factor(c1, c2) * ilr_values - - return ilr_values - - -@beartype -def single_ilr_transform( - df: pd.DataFrame, subcomposition_1: Sequence[str], subcomposition_2: Sequence[str] -) -> pd.Series: - """ - Perform a single isometric logratio transformation on the provided subcompositions. - - Returns ILR balances. Column order matters. - - Args: - df: A dataframe of shape [N, D] of compositional data. - subcomposition_1: Names of the columns in the numerator part of the ratio. - subcomposition_2: Names of the columns in the denominator part of the ratio. - - Returns: - A series of length N containing the transforms. - - Raises: - InvalidColumnException: One or more subcomposition columns are not found in the input dataframe. - InvalidCompositionException: Data is not normalized to the expected value or - one or more columns are found in both subcompositions. - InvalidParameterValueException: At least one subcomposition provided was empty. - NumericValueSignException: Data contains zeros or negative values. - """ - check_in_simplex_sample_space(df) - - if not (subcomposition_1 and subcomposition_2): - raise InvalidParameterValueException("A subcomposition should contain at least one column.") - - if not (check_columns_valid(df, subcomposition_1) and check_columns_valid(df, subcomposition_2)): - raise InvalidColumnException("Not all of the input columns were found in the input dataframe.") - - if check_lists_overlap(subcomposition_1, subcomposition_2): - raise InvalidCompositionException("The subcompositions overlap.") - - return _single_ilr_transform(df, subcomposition_1, subcomposition_2) +import numpy as np +import pandas as pd +from beartype import beartype +from beartype.typing import Sequence +from scipy.stats import gmean + +from eis_toolkit.exceptions import InvalidColumnException, InvalidCompositionException, InvalidParameterValueException +from eis_toolkit.utilities.checks.compositional import check_compositional_data +from eis_toolkit.utilities.checks.dataframe import check_columns_valid +from eis_toolkit.utilities.checks.parameter import check_lists_overlap, check_numeric_value_sign + + +@beartype +def _calculate_ilr_scaling_factor(c1: int, c2: int) -> np.float64: + """ + Calculate the scaling factor for the ILR transform. + + Args: + c1: The cardinality of the first subcomposition. + c2: The cardinality of the second subcomposition. + + Returns: + The scaling factor. + + Raises: + InvalidParameterValueException: One or both of the input values are zero or negative. + """ + if not (check_numeric_value_sign(c1) and check_numeric_value_sign(c2)): + raise InvalidParameterValueException("Input values must both be positive integers.") + + return np.sqrt((c1 * c2) / np.float64(c1 + c2)) + + +@beartype +def _geometric_mean_logratio( + row: pd.Series, subcomposition_1: Sequence[str], subcomposition_2: Sequence[str] +) -> np.float64: + + numerator = gmean(row[subcomposition_1]) + denominator = gmean(row[subcomposition_2]) + return np.log(numerator / denominator) + + +@beartype +def _single_ilr_transform( + df: pd.DataFrame, subcomposition_1: Sequence[str], subcomposition_2: Sequence[str] +) -> pd.Series: + + dfc = df.copy() + + c1 = len(subcomposition_1) + c2 = len(subcomposition_2) + + # A Series to hold the transformed rows + ilr_values = pd.Series([0.0] * df.shape[0]) + + for idx, row in dfc.iterrows(): + ilr_values[idx] = _geometric_mean_logratio(row, subcomposition_1, subcomposition_2) + + ilr_values = _calculate_ilr_scaling_factor(c1, c2) * ilr_values + + return ilr_values + + +@beartype +def single_ilr_transform( + df: pd.DataFrame, subcomposition_1: Sequence[str], subcomposition_2: Sequence[str] +) -> pd.Series: + """ + Perform a single isometric logratio transformation on the provided subcompositions. + + Returns ILR balances. Column order matters. + + Args: + df: A dataframe of shape [N, D] of compositional data. + subcomposition_1: Names of the columns in the numerator part of the ratio. + subcomposition_2: Names of the columns in the denominator part of the ratio. + + Returns: + A series of length N containing the transforms. + + Raises: + InvalidColumnException: One or more subcomposition columns are not found in the input dataframe. + InvalidCompositionException: Data is not normalized to the expected value or + one or more columns are found in both subcompositions. + InvalidParameterValueException: At least one subcomposition provided was empty. + NumericValueSignException: Data contains zeros or negative values. + """ + check_compositional_data(df) + + if not (subcomposition_1 and subcomposition_2): + raise InvalidParameterValueException("A subcomposition should contain at least one column.") + + if not (check_columns_valid(df, subcomposition_1) and check_columns_valid(df, subcomposition_2)): + raise InvalidColumnException("Not all of the input columns were found in the input dataframe.") + + if check_lists_overlap(subcomposition_1, subcomposition_2): + raise InvalidCompositionException("The subcompositions overlap.") + + return _single_ilr_transform(df, subcomposition_1, subcomposition_2) diff --git a/eis_toolkit/transformations/coda/pairwise.py b/eis_toolkit/transformations/coda/pairwise.py index 593517b8..c64d195f 100644 --- a/eis_toolkit/transformations/coda/pairwise.py +++ b/eis_toolkit/transformations/coda/pairwise.py @@ -1,73 +1,73 @@ -from numbers import Number - -import numpy as np -import pandas as pd -from beartype import beartype - -from eis_toolkit.exceptions import InvalidColumnException, InvalidParameterValueException -from eis_toolkit.utilities.checks.dataframe import check_dataframe_contains_zeros - - -@beartype -def _single_pairwise_logratio(numerator: Number, denominator: Number) -> np.float64: - - return np.log(numerator / float(denominator)) - - -@beartype -def single_pairwise_logratio(numerator: Number, denominator: Number) -> np.float64: - """ - Perform a pairwise logratio transformation on the given values. - - Args: - numerator: The numerator in the ratio. - denominator: The denominator in the ratio. - - Returns: - The transformed value. - - Raises: - InvalidParameterValueException: One or both input values are zero. - """ - if numerator == 0 or denominator == 0: - raise InvalidParameterValueException("Input values cannot be zero.") - - return _single_pairwise_logratio(numerator, denominator) - - -@beartype -def _pairwise_logratio(df: pd.DataFrame, numerator_column: str, denominator_column: str) -> pd.Series: - dfc = df.copy() - - result = pd.Series([0.0] * df.shape[0]) - - for idx, row in dfc.iterrows(): - result[idx] = single_pairwise_logratio(row[numerator_column], row[denominator_column]) - - return result - - -@beartype -def pairwise_logratio(df: pd.DataFrame, numerator_column: str, denominator_column: str) -> pd.Series: - """ - Perform a pairwise logratio transformation on the given columns. - - Args: - df: The dataframe containing the columns to use in the transformation. - numerator_column: The name of the column to use as the numerator column. - denominator_column: The name of the column to use as the denominator. - - Returns: - A series containing the transformed values. - - Raises: - InvalidColumnException: One or both of the input columns are not found in the dataframe. - InvalidParameterValueException: The input columns contain at least one zero value. - """ - if numerator_column not in df.columns or denominator_column not in df.columns: - raise InvalidColumnException("At least one input column is not found in the dataframe.") - - if check_dataframe_contains_zeros(df[[numerator_column, denominator_column]]): - raise InvalidParameterValueException("The input columns contain at least one zero value.") - - return _pairwise_logratio(df, numerator_column, denominator_column) +from numbers import Number + +import numpy as np +import pandas as pd +from beartype import beartype + +from eis_toolkit.exceptions import InvalidColumnException, InvalidParameterValueException +from eis_toolkit.utilities.checks.dataframe import check_dataframe_contains_zeros + + +@beartype +def _single_pairwise_logratio(numerator: Number, denominator: Number) -> np.float64: + + return np.log(numerator / float(denominator)) + + +@beartype +def single_pairwise_logratio(numerator: Number, denominator: Number) -> np.float64: + """ + Perform a pairwise logratio transformation on the given values. + + Args: + numerator: The numerator in the ratio. + denominator: The denominator in the ratio. + + Returns: + The transformed value. + + Raises: + InvalidParameterValueException: One or both input values are zero. + """ + if numerator == 0 or denominator == 0: + raise InvalidParameterValueException("Input values cannot be zero.") + + return _single_pairwise_logratio(numerator, denominator) + + +@beartype +def _pairwise_logratio(df: pd.DataFrame, numerator_column: str, denominator_column: str) -> pd.Series: + dfc = df.copy() + + result = pd.Series([0.0] * df.shape[0]) + + for idx, row in dfc.iterrows(): + result[idx] = single_pairwise_logratio(row[numerator_column], row[denominator_column]) + + return result + + +@beartype +def pairwise_logratio(df: pd.DataFrame, numerator_column: str, denominator_column: str) -> pd.Series: + """ + Perform a pairwise logratio transformation on the given columns. + + Args: + df: The dataframe containing the columns to use in the transformation. + numerator_column: The name of the column to use as the numerator column. + denominator_column: The name of the column to use as the denominator. + + Returns: + A series containing the transformed values. + + Raises: + InvalidColumnException: One or both of the input columns are not found in the dataframe. + InvalidParameterValueException: The input columns contain at least one zero value. + """ + if numerator_column not in df.columns or denominator_column not in df.columns: + raise InvalidColumnException("At least one input column is not found in the dataframe.") + + if check_dataframe_contains_zeros(df[[numerator_column, denominator_column]]): + raise InvalidParameterValueException("The input columns contain at least one zero value.") + + return _pairwise_logratio(df, numerator_column, denominator_column) diff --git a/eis_toolkit/transformations/coda/plr.py b/eis_toolkit/transformations/coda/plr.py index 3b58cca0..d0721db0 100644 --- a/eis_toolkit/transformations/coda/plr.py +++ b/eis_toolkit/transformations/coda/plr.py @@ -1,126 +1,126 @@ -import numpy as np -import pandas as pd -from beartype import beartype -from scipy.stats import gmean - -from eis_toolkit.exceptions import InvalidColumnException, InvalidParameterValueException -from eis_toolkit.utilities.checks.compositional import check_in_simplex_sample_space -from eis_toolkit.utilities.checks.parameter import check_numeric_value_sign -from eis_toolkit.utilities.miscellaneous import rename_columns_by_pattern - - -@beartype -def _calculate_plr_scaling_factor(c: int) -> np.float64: - """ - Calculate the scaling factor for the PLR transform. - - Args: - c: The cardinality of the remaining parts in the composition. - - Returns: - The scaling factor used performing a single PLR transform for a composition. - - Raises: - InvalidParameterValueException: The input value is zero or negative. - """ - if not (check_numeric_value_sign(c)): - raise InvalidParameterValueException("The input value must be a positive integer.") - - return np.sqrt(c / np.float64(1 + c)) - - -@beartype -def _single_plr_transform_by_index(df: pd.DataFrame, column_ind: int) -> pd.Series: - - dfc = df.copy() - # The denominator is a subcomposition of all the parts "to the right" of the column: - columns = [col for col in df.columns] - subcomposition = [columns[i] for i in range(len(columns)) if i > column_ind] - c = len(subcomposition) - scaling_factor = _calculate_plr_scaling_factor(c) - - # A series to hold the transformed rows - plr_values = pd.Series([0.0] * df.shape[0]) - - for idx, row in dfc.iterrows(): - plr_values[idx] = scaling_factor * np.log(row.iloc[column_ind] / gmean(row[subcomposition])) - - return plr_values - - -@beartype -def _single_plr_transform(df: pd.DataFrame, column: str) -> pd.Series: - - idx = df.columns.get_loc(column) - - return _single_plr_transform_by_index(df, idx) - - -@beartype -def single_plr_transform(df: pd.DataFrame, column: str) -> pd.Series: - """ - Perform a pivot logratio transformation on the selected column. - - Pivot logratio is a special case of ILR, where the numerator in the ratio is always a single - part and the denominator all of the parts to the right in the ordered list of parts. - - Column order matters. - - Args: - df: A dataframe of shape [N, D] of compositional data. - column: The name of the numerator column to use for the transformation. - - Returns: - A series of length N containing the transforms. - - Raises: - InvalidColumnException: The input column isn't found in the dataframe, or there are no columns - to the right of the given column. - InvalidCompositionException: Data is not normalized to the expected value. - NumericValueSignException: Data contains zeros or negative values. - """ - check_in_simplex_sample_space(df) - - if column not in df.columns: - raise InvalidColumnException(f"The column {column} was not found in the dataframe.") - - idx = df.columns.get_loc(column) - - if idx == len(df.columns) - 1: - raise InvalidColumnException() - - return _single_plr_transform(df, column) - - -@beartype -def _plr_transform(df: pd.DataFrame) -> pd.DataFrame: - dfc = df.copy() - - # A dataframe to hold the transformed values - plr_values = pd.DataFrame(0.0, index=dfc.index, columns=dfc.columns[:-1]) - - for i in range(len(df.columns) - 1): - plr_values.iloc[:, i] = _single_plr_transform_by_index(dfc, i) - - return plr_values - - -@beartype -def plr_transform(df: pd.DataFrame) -> pd.DataFrame: - """ - Perform a pivot logratio transformation on the dataframe, returning the full set of transforms. - - Args: - df: A dataframe of shape [N, D] of compositional data. - - Returns: - A dataframe of shape [N, D-1] containing the set of PLR transformed data. - - Raises: - InvalidColumnException: The data contains one or more zeros. - InvalidCompositionException: Data is not normalized to the expected value. - NumericValueSignException: Data contains zeros or negative values. - """ - check_in_simplex_sample_space(df) - - return rename_columns_by_pattern(_plr_transform(df)) +import numpy as np +import pandas as pd +from beartype import beartype +from scipy.stats import gmean + +from eis_toolkit.exceptions import InvalidColumnException, InvalidParameterValueException +from eis_toolkit.utilities.checks.compositional import check_compositional_data +from eis_toolkit.utilities.checks.parameter import check_numeric_value_sign +from eis_toolkit.utilities.miscellaneous import rename_columns_by_pattern + + +@beartype +def _calculate_plr_scaling_factor(c: int) -> np.float64: + """ + Calculate the scaling factor for the PLR transform. + + Args: + c: The cardinality of the remaining parts in the composition. + + Returns: + The scaling factor used performing a single PLR transform for a composition. + + Raises: + InvalidParameterValueException: The input value is zero or negative. + """ + if not (check_numeric_value_sign(c)): + raise InvalidParameterValueException("The input value must be a positive integer.") + + return np.sqrt(c / np.float64(1 + c)) + + +@beartype +def _single_plr_transform_by_index(df: pd.DataFrame, column_ind: int) -> pd.Series: + + dfc = df.copy() + # The denominator is a subcomposition of all the parts "to the right" of the column: + columns = [col for col in df.columns] + subcomposition = [columns[i] for i in range(len(columns)) if i > column_ind] + c = len(subcomposition) + scaling_factor = _calculate_plr_scaling_factor(c) + + # A series to hold the transformed rows + plr_values = pd.Series([0.0] * df.shape[0]) + + for idx, row in dfc.iterrows(): + plr_values[idx] = scaling_factor * np.log(row.iloc[column_ind] / gmean(row[subcomposition])) + + return plr_values + + +@beartype +def _single_plr_transform(df: pd.DataFrame, column: str) -> pd.Series: + + idx = df.columns.get_loc(column) + + return _single_plr_transform_by_index(df, idx) + + +@beartype +def single_plr_transform(df: pd.DataFrame, column: str) -> pd.Series: + """ + Perform a pivot logratio transformation on the selected column. + + Pivot logratio is a special case of ILR, where the numerator in the ratio is always a single + part and the denominator all of the parts to the right in the ordered list of parts. + + Column order matters. + + Args: + df: A dataframe of shape [N, D] of compositional data. + column: The name of the numerator column to use for the transformation. + + Returns: + A series of length N containing the transforms. + + Raises: + InvalidColumnException: The input column isn't found in the dataframe, or there are no columns + to the right of the given column. + InvalidCompositionException: Data is not normalized to the expected value. + NumericValueSignException: Data contains zeros or negative values. + """ + check_compositional_data(df) + + if column not in df.columns: + raise InvalidColumnException(f"The column {column} was not found in the dataframe.") + + idx = df.columns.get_loc(column) + + if idx == len(df.columns) - 1: + raise InvalidColumnException() + + return _single_plr_transform(df, column) + + +@beartype +def _plr_transform(df: pd.DataFrame) -> pd.DataFrame: + dfc = df.copy() + + # A dataframe to hold the transformed values + plr_values = pd.DataFrame(0.0, index=dfc.index, columns=dfc.columns[:-1]) + + for i in range(len(df.columns) - 1): + plr_values.iloc[:, i] = _single_plr_transform_by_index(dfc, i) + + return plr_values + + +@beartype +def plr_transform(df: pd.DataFrame) -> pd.DataFrame: + """ + Perform a pivot logratio transformation on the dataframe, returning the full set of transforms. + + Args: + df: A dataframe of shape [N, D] of compositional data. + + Returns: + A dataframe of shape [N, D-1] containing the set of PLR transformed data. + + Raises: + InvalidColumnException: The data contains one or more zeros. + InvalidCompositionException: Data is not normalized to the expected value. + NumericValueSignException: Data contains zeros or negative values. + """ + check_compositional_data(df) + + return rename_columns_by_pattern(_plr_transform(df)) diff --git a/eis_toolkit/utilities/aitchison_geometry.py b/eis_toolkit/utilities/aitchison_geometry.py index 80796637..ca2b03e8 100644 --- a/eis_toolkit/utilities/aitchison_geometry.py +++ b/eis_toolkit/utilities/aitchison_geometry.py @@ -1,45 +1,47 @@ -from numbers import Number - -import numpy as np -import pandas as pd -from beartype import beartype -from beartype.typing import Optional - - -@beartype -def _normalize(row: pd.Series, sum_value: Number = 1.0) -> pd.Series: - """ - Normalize the series to a given value. - - If no value is provided, normalize to 1. - - Args: - row: The series to normalize. - - Returns: - A series containing the normalized values. - """ - scale = np.float64(np.sum(row)) / sum_value - return np.divide(row, scale) - - -@beartype -def _closure(df: pd.DataFrame, scale: Optional[Number] = None) -> pd.DataFrame: - """ - Perform the closure operation on the dataframe. - - Assumes the standard simplex, in which the sum of the components of each composition vector is 1. - - Args: - df: A dataframe of shape (N, D) compositional data. - - Returns: - A new dataframe of shape (N, D) where each row has been normalized to 1. - """ - - dfc = df.copy() - - for idx, row in df.iterrows(): - dfc.iloc[idx] = _normalize(row, scale) if scale is not None else _normalize(row) - - return dfc +from numbers import Number + +import numpy as np +import pandas as pd +from beartype import beartype +from beartype.typing import Optional + + +@beartype +def _normalize(row: pd.Series, sum_value: Number = 1.0) -> pd.Series: + """ + Normalize the series to a given value. + + If no value is provided, normalize to 1. + + Args: + row: The series to normalize. + + Returns: + A series containing the normalized values. + """ + scale = np.float64(np.sum(row)) / sum_value + return np.divide(row, scale) + + +@beartype +def _closure(df: pd.DataFrame, scale: Optional[Number] = None) -> pd.DataFrame: + """ + Perform the closure operation on the dataframe. + + If a scale value representing the constant sum is not provided, assumes the standard simplex, + in which the sum of th components of each composition vector is 1. + + Args: + df: A dataframe of shape (N, D) compositional data. + scale: The sum to which each data row should result to. Default is 1. + + Returns: + A new dataframe of shape (N, D) where each row has been normalized to the given scale value. + """ + + dfc = df.copy() + + for idx, row in df.iterrows(): + dfc.iloc[idx] = _normalize(row, scale) if scale is not None else _normalize(row) + + return dfc diff --git a/eis_toolkit/utilities/checks/compositional.py b/eis_toolkit/utilities/checks/compositional.py index 41270eca..7b68c51f 100644 --- a/eis_toolkit/utilities/checks/compositional.py +++ b/eis_toolkit/utilities/checks/compositional.py @@ -1,40 +1,51 @@ -import numpy as np -import pandas as pd -from beartype import beartype -from beartype.typing import Optional - -from eis_toolkit.exceptions import InvalidCompositionException, NumericValueSignException -from eis_toolkit.utilities.checks.dataframe import check_dataframe_contains_only_positive_numbers - - -@beartype -def check_in_simplex_sample_space(df: pd.DataFrame, expected_sum: Optional[np.float64] = None) -> None: - """ - Check that the compositions represented by the data rows belong to a simplex sample space. - - Checks that each compositional data point belongs to the set of positive real numbers. - Checks that each composition is normalized to the same value. - - Args: - df: The dataframe to check. - expected_sum: The expected sum of each row. If None, simply checks that the sum of each row is equal. - - Returns: - True if values are valid and the sum of each row is the expected_sum. - - Raises: - InvalidCompositionException: Data is not normalized to the expected value. - NumericValueSignException: Data contains zeros or negative values. - """ - if df.isnull().values.any(): - raise InvalidCompositionException("Data contains NaN values.") - - if not check_dataframe_contains_only_positive_numbers(df): - raise NumericValueSignException("Data contains zeros or negative values.") - - df_sum = np.sum(df, axis=1) - expected_sum = expected_sum if expected_sum is not None else df_sum.iloc[0] - if len(df_sum[df_sum.iloc[:] != expected_sum]) != 0: - raise InvalidCompositionException("Not each composition is normalized to the same value.") - - return None +import numpy as np +import pandas as pd +from beartype import beartype +from beartype.typing import Optional + +from eis_toolkit.exceptions import InvalidCompositionException, NumericValueSignException +from eis_toolkit.utilities.checks.dataframe import check_dataframe_contains_only_positive_numbers + + +@beartype +def check_in_simplex_sample_space(df: pd.DataFrame, expected_sum: Optional[np.float64] = None) -> None: + """ + Check that the compositions represented by the data rows belong to a simplex sample space. + + Checks that each composition is normalized to the same value. + + Args: + df: The dataframe to check. + expected_sum: The expected sum of each row. If None, simply checks that the sum of each row is equal. + + Raises: + InvalidCompositionException: Data contains NaN values or is not normalized to the expected value. + NumericValueSignException: Data contains zeros or negative values. + """ + check_compositional_data(df) + + df_sum = np.sum(df, axis=1) + expected_sum = expected_sum if expected_sum is not None else df_sum.iloc[0] + if len(df_sum[df_sum.iloc[:] != expected_sum]) != 0: + raise InvalidCompositionException("Not each composition is normalized to the same value.") + + return None + + +@beartype +def check_compositional_data(df: pd.DataFrame) -> None: + """ + Check that each compositional data point belongs to the set of positive real numbers. + + Args: + df: The dataframe to check. + + Raises: + InvalidCompositionException: Data contains NaN values. + NumericValueSignException: Data contains zeros or negative values. + """ + if df.isnull().values.any(): + raise InvalidCompositionException("Data contains NaN values.") + + if not check_dataframe_contains_only_positive_numbers(df): + raise NumericValueSignException("Data contains zeros or negative values.") diff --git a/notebooks/testing_logratio_transformations.ipynb b/notebooks/testing_logratio_transformations.ipynb index f8f62961..19f377aa 100644 --- a/notebooks/testing_logratio_transformations.ipynb +++ b/notebooks/testing_logratio_transformations.ipynb @@ -20,9 +20,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "/root/.cache/pypoetry/virtualenvs/eis-toolkit-QEzTY9B6-py3.10/lib/python3.10/site-packages/beartype/_util/hint/pep/utilpeptest.py:347: BeartypeDecorHintPep585DeprecationWarning: PEP 484 type hint typing.Sequence[str] deprecated by PEP 585. This hint is scheduled for removal in the first Python version released after October 5th, 2025. To resolve this, import this hint from \"beartype.typing\" rather than \"typing\". For further commentary and alternatives, see also:\n", - " https://beartype.readthedocs.io/en/latest/api_roar/#pep-585-deprecations\n", - " warn(\n" + "/root/.cache/pypoetry/virtualenvs/eis-toolkit-QEzTY9B6-py3.10/lib/python3.10/site-packages/geopandas/_compat.py:112: UserWarning: The Shapely GEOS version (3.10.3-CAPI-1.16.1) is incompatible with the GEOS version PyGEOS was compiled with (3.10.4-CAPI-1.16.2). Conversions between both will be slow.\n", + " warnings.warn(\n" ] } ], @@ -727,7 +726,7 @@ { "cell_type": "code", "execution_count": 21, - "id": "e1bda63b-ab9b-4060-90d5-7520952f2e3a", + "id": "41bfcb78-bdfa-4c03-a9b4-45c2258c60a8", "metadata": { "tags": [] }, @@ -757,7 +756,6 @@ " Ca_ppm_511\n", " Fe_ppm_511\n", " Mg_ppm_511\n", - " residual\n", " \n", " \n", " \n", @@ -767,7 +765,6 @@ " 40200.0\n", " 83200.0\n", " 17200.0\n", - " 831800.0\n", " \n", " \n", " 1\n", @@ -775,7 +772,6 @@ " 5000.0\n", " 28300.0\n", " 7520.0\n", - " 945080.0\n", " \n", " \n", " 2\n", @@ -783,7 +779,6 @@ " 3070.0\n", " 14500.0\n", " 4540.0\n", - " 970010.0\n", " \n", " \n", " 3\n", @@ -791,7 +786,6 @@ " 3290.0\n", " 14600.0\n", " 3240.0\n", - " 971570.0\n", " \n", " \n", " 4\n", @@ -799,19 +793,18 @@ " 3600.0\n", " 31500.0\n", " 8020.0\n", - " 944380.0\n", " \n", " \n", "\n", "" ], "text/plain": [ - " Al_ppm_511 Ca_ppm_511 Fe_ppm_511 Mg_ppm_511 residual\n", - "0 27600.0 40200.0 83200.0 17200.0 831800.0\n", - "1 14100.0 5000.0 28300.0 7520.0 945080.0\n", - "2 7880.0 3070.0 14500.0 4540.0 970010.0\n", - "3 7300.0 3290.0 14600.0 3240.0 971570.0\n", - "4 12500.0 3600.0 31500.0 8020.0 944380.0" + " Al_ppm_511 Ca_ppm_511 Fe_ppm_511 Mg_ppm_511\n", + "0 27600.0 40200.0 83200.0 17200.0\n", + "1 14100.0 5000.0 28300.0 7520.0\n", + "2 7880.0 3070.0 14500.0 4540.0\n", + "3 7300.0 3290.0 14600.0 3240.0\n", + "4 12500.0 3600.0 31500.0 8020.0" ] }, "execution_count": 21, @@ -825,15 +818,12 @@ "df = gpd.read_file(GEOCHEMICAL_DATA, include_fields=elements_to_analyze)\n", "df = pd.DataFrame(df.drop(columns='geometry'))\n", "\n", - "# Add a column for the residual\n", - "\n", - "df[\"residual\"] = million - np.sum(df, axis=1)\n", "df.head()" ] }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 22, "id": "75728aa4-5b2e-46b6-9511-1250bf4b13ae", "metadata": { "tags": [] @@ -843,7 +833,6 @@ "pair_Al_Ca = pairwise_logratio(df, \"Al_ppm_511\", \"Ca_ppm_511\")\n", "pair_Fe_Mg = pairwise_logratio(df, \"Fe_ppm_511\", \"Mg_ppm_511\")\n", "pair_Mg_Al = pairwise_logratio(df, \"Mg_ppm_511\", \"Al_ppm_511\")\n", - "pair_Mg_res = pairwise_logratio(df, \"Mg_ppm_511\", \"residual\")\n", "\n", "df_alr = alr_transform(df)\n", "df_alr_Mg = alr_transform(df, \"Mg_ppm_511\")\n", @@ -859,7 +848,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 23, "id": "e136d05d-671d-420f-95b9-5f350bc7a94c", "metadata": { "tags": [] @@ -876,7 +865,7 @@ "dtype: float64" ] }, - "execution_count": 25, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } @@ -887,7 +876,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 24, "id": "ad352680-433a-4026-b7b5-560b682dfb96", "metadata": { "tags": [] @@ -917,7 +906,6 @@ " V1\n", " V2\n", " V3\n", - " V4\n", " \n", " \n", " \n", @@ -926,50 +914,45 @@ " 0.472906\n", " 0.848958\n", " 1.576338\n", - " 3.878683\n", " \n", " \n", " 1\n", " 0.628609\n", " -0.408128\n", " 1.325296\n", - " 4.833703\n", " \n", " \n", " 2\n", " 0.551401\n", " -0.391249\n", " 1.161222\n", - " 5.364379\n", " \n", " \n", " 3\n", " 0.812301\n", " 0.015314\n", " 1.505448\n", - " 5.703340\n", " \n", " \n", " 4\n", " 0.443790\n", " -0.801005\n", " 1.368049\n", - " 4.768590\n", " \n", " \n", "\n", "" ], "text/plain": [ - " V1 V2 V3 V4\n", - "0 0.472906 0.848958 1.576338 3.878683\n", - "1 0.628609 -0.408128 1.325296 4.833703\n", - "2 0.551401 -0.391249 1.161222 5.364379\n", - "3 0.812301 0.015314 1.505448 5.703340\n", - "4 0.443790 -0.801005 1.368049 4.768590" + " V1 V2 V3\n", + "0 0.472906 0.848958 1.576338\n", + "1 0.628609 -0.408128 1.325296\n", + "2 0.551401 -0.391249 1.161222\n", + "3 0.812301 0.015314 1.505448\n", + "4 0.443790 -0.801005 1.368049" ] }, - "execution_count": 26, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } @@ -977,14 +960,6 @@ "source": [ "df_alr_Mg.head()" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8b6a1929-51ef-4b7a-8621-f46bbe337e31", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/tests/utilities/compositional_test.py b/tests/utilities/compositional_test.py index 6aa77d2e..ec655e5c 100644 --- a/tests/utilities/compositional_test.py +++ b/tests/utilities/compositional_test.py @@ -1,99 +1,83 @@ -import numpy as np -import pandas as pd -import pytest - -from eis_toolkit.exceptions import InvalidCompositionException, NumericValueSignException -from eis_toolkit.transformations.coda.alr import alr_transform -from eis_toolkit.transformations.coda.clr import clr_transform -from eis_toolkit.transformations.coda.ilr import single_ilr_transform -from eis_toolkit.transformations.coda.plr import plr_transform, single_plr_transform -from eis_toolkit.utilities.checks.compositional import check_in_simplex_sample_space - - -def test_compositional_data_has_zeros(): - """Test that performing logratio transforms for data containing zeros raises the correct exception.""" - arr = np.array([[80, 0, 5], [75, 18, 7]]) - df = pd.DataFrame(arr, columns=["a", "b", "c"]) - with pytest.raises(NumericValueSignException): - alr_transform(df) - with pytest.raises(NumericValueSignException): - clr_transform(df) - with pytest.raises(NumericValueSignException): - single_ilr_transform(df, ["a"], ["b"]) - with pytest.raises(NumericValueSignException): - plr_transform(df) - with pytest.raises(NumericValueSignException): - single_plr_transform(df, "b") - - -def test_compositional_data_has_negatives(): - """Test that performing logratio transforms for data containing negative values raises the correct exception.""" - arr = np.array([[80, 25, -5], [75, 32, -7]]) - df = pd.DataFrame(arr, columns=["a", "b", "c"]) - with pytest.raises(NumericValueSignException): - alr_transform(df) - with pytest.raises(NumericValueSignException): - clr_transform(df) - with pytest.raises(NumericValueSignException): - single_ilr_transform(df, ["a"], ["b"]) - with pytest.raises(NumericValueSignException): - plr_transform(df) - with pytest.raises(NumericValueSignException): - single_plr_transform(df, "b") - - -def test_compositional_data_has_nans(): - """Test that performing logratio transforms for data containing NaN values raises the correct exception.""" - df = pd.DataFrame(np.ones((3, 3)), columns=["a", "b", "c"]) - df.iloc[:, 0] = np.NaN - with pytest.raises(InvalidCompositionException): - alr_transform(df) - with pytest.raises(InvalidCompositionException): - clr_transform(df) - with pytest.raises(InvalidCompositionException): - single_ilr_transform(df, ["a"], ["b"]) - with pytest.raises(InvalidCompositionException): - plr_transform(df) - with pytest.raises(InvalidCompositionException): - single_plr_transform(df, "b") - - -def test_compositional_data_invalid(): - """Test that input data that does not belong to a simplex sample space raises the correct exception.""" - arr = np.array([[1, 1, 1], [2, 2, 2]]) - df = pd.DataFrame(arr, columns=["a", "b", "c"]) - with pytest.raises(InvalidCompositionException): - alr_transform(df) - with pytest.raises(InvalidCompositionException): - clr_transform(df) - with pytest.raises(InvalidCompositionException): - single_ilr_transform(df, ["a"], ["b"]) - with pytest.raises(InvalidCompositionException): - plr_transform(df) - with pytest.raises(InvalidCompositionException): - single_plr_transform(df, "b") - - -def test_check_for_simplex_sample_space(): - """Test whether or not a dataframe belongs to a simplex sample space is correctly identified.""" - unit_simplex_df = pd.DataFrame([[0.1, 0.2, 0.3, 0.4], [0.2, 0.3, 0.2, 0.3]]) - simplex_df = pd.DataFrame([[1, 2, 3, 4], [2, 3, 2, 3]], columns=["a", "b", "c", "d"]) - non_simplex_positive_df = pd.DataFrame([1, 2, 3, 4], [5, 6, 7, 8]) - non_positive_df = pd.DataFrame([-1, 2, 3, 4], [1, 2, 3, 4]) - - with pytest.raises(InvalidCompositionException): - check_in_simplex_sample_space(non_simplex_positive_df) - - with pytest.raises(NumericValueSignException): - check_in_simplex_sample_space(non_positive_df) - - with pytest.raises(InvalidCompositionException): - check_in_simplex_sample_space(simplex_df, np.float64(100)) - - # Valid cases - assert no exception is raised - try: - check_in_simplex_sample_space(simplex_df) - check_in_simplex_sample_space(simplex_df, np.float64(10)) - check_in_simplex_sample_space(unit_simplex_df, np.float64(1.0)) - except Exception as ex: - assert False, f"{type(ex)}: {ex}" +import numpy as np +import pandas as pd +import pytest + +from eis_toolkit.exceptions import InvalidCompositionException, NumericValueSignException +from eis_toolkit.transformations.coda.alr import alr_transform +from eis_toolkit.transformations.coda.clr import clr_transform +from eis_toolkit.transformations.coda.ilr import single_ilr_transform +from eis_toolkit.transformations.coda.plr import plr_transform, single_plr_transform +from eis_toolkit.utilities.checks.compositional import check_in_simplex_sample_space + + +def test_compositional_data_has_zeros(): + """Test that performing logratio transforms for data containing zeros raises the correct exception.""" + arr = np.array([[80, 0, 5], [75, 18, 7]]) + df = pd.DataFrame(arr, columns=["a", "b", "c"]) + with pytest.raises(NumericValueSignException): + alr_transform(df) + with pytest.raises(NumericValueSignException): + clr_transform(df) + with pytest.raises(NumericValueSignException): + single_ilr_transform(df, ["a"], ["b"]) + with pytest.raises(NumericValueSignException): + plr_transform(df) + with pytest.raises(NumericValueSignException): + single_plr_transform(df, "b") + + +def test_compositional_data_has_negatives(): + """Test that performing logratio transforms for data containing negative values raises the correct exception.""" + arr = np.array([[80, 25, -5], [75, 32, -7]]) + df = pd.DataFrame(arr, columns=["a", "b", "c"]) + with pytest.raises(NumericValueSignException): + alr_transform(df) + with pytest.raises(NumericValueSignException): + clr_transform(df) + with pytest.raises(NumericValueSignException): + single_ilr_transform(df, ["a"], ["b"]) + with pytest.raises(NumericValueSignException): + plr_transform(df) + with pytest.raises(NumericValueSignException): + single_plr_transform(df, "b") + + +def test_compositional_data_has_nans(): + """Test that performing logratio transforms for data containing NaN values raises the correct exception.""" + df = pd.DataFrame(np.ones((3, 3)), columns=["a", "b", "c"]) + df.iloc[:, 0] = np.NaN + with pytest.raises(InvalidCompositionException): + alr_transform(df) + with pytest.raises(InvalidCompositionException): + clr_transform(df) + with pytest.raises(InvalidCompositionException): + single_ilr_transform(df, ["a"], ["b"]) + with pytest.raises(InvalidCompositionException): + plr_transform(df) + with pytest.raises(InvalidCompositionException): + single_plr_transform(df, "b") + + +def test_check_for_simplex_sample_space(): + """Test whether or not a dataframe belongs to a simplex sample space is correctly identified.""" + unit_simplex_df = pd.DataFrame([[0.1, 0.2, 0.3, 0.4], [0.2, 0.3, 0.2, 0.3]]) + simplex_df = pd.DataFrame([[1, 2, 3, 4], [2, 3, 2, 3]], columns=["a", "b", "c", "d"]) + non_simplex_positive_df = pd.DataFrame([1, 2, 3, 4], [5, 6, 7, 8]) + non_positive_df = pd.DataFrame([-1, 2, 3, 4], [1, 2, 3, 4]) + + with pytest.raises(InvalidCompositionException): + check_in_simplex_sample_space(non_simplex_positive_df) + + with pytest.raises(NumericValueSignException): + check_in_simplex_sample_space(non_positive_df) + + with pytest.raises(InvalidCompositionException): + check_in_simplex_sample_space(simplex_df, np.float64(100)) + + # Valid cases - assert no exception is raised + try: + check_in_simplex_sample_space(simplex_df) + check_in_simplex_sample_space(simplex_df, np.float64(10)) + check_in_simplex_sample_space(unit_simplex_df, np.float64(1.0)) + except Exception as ex: + assert False, f"{type(ex)}: {ex}"