diff --git a/popmon/analysis/comparison/hist_comparer.py b/popmon/analysis/comparison/hist_comparer.py index 9dc25b31..c7211534 100644 --- a/popmon/analysis/comparison/hist_comparer.py +++ b/popmon/analysis/comparison/hist_comparer.py @@ -16,7 +16,7 @@ # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - +from typing import Any, Callable, Dict, Optional import numpy as np import pandas as pd @@ -98,42 +98,73 @@ def hist_compare(row, hist_name1="", hist_name2=""): return pd.Series(x) -class HistComparer(Pipeline): - """Base pipeline to compare histogram to previous rolling histograms""" - +class GenericHistComparer(Pipeline): def __init__( self, - func_hist_collector, - read_key, - store_key, - assign_to_key=None, - hist_col="histogram", - suffix="comp", + store_key: str, + hist_col: str, + prefix: str, + assign_to_key: Optional[str] = None, + left_read_key: Optional[str] = None, + right_read_key: Optional[str] = None, + func_left: Optional[Callable] = None, + func_right: Optional[Callable] = None, + suffix1: Optional[str] = None, + suffix2: Optional[str] = None, + left_kwargs: Optional[Dict[Any, Any]] = None, + right_kwargs: Optional[Dict[Any, Any]] = None, *args, **kwargs, ): - """Initialize an instance of RollingHistComparer. + # TODO: add reference type to datastore (along the lines of datastore["references"] = + # {"ref": "Self Reference", "prev1": Rolling Reference (window=1, shift=1)"}) - :param func_hist_collector: histogram collection function - :param str read_key: key of input data to read from data store - :param str store_key: key of output data to store in data store - :param str assign_to_key: key of the input data to assign function applied-output to. (optional) - :param str hist_col: column/key in input df/dict that contains the histogram. default is 'histogram' - :param str suffix: column/key of rolling histogram. default is 'roll' -> column = 'histogram_roll' - :param args: (tuple, optional): residual args passed on to func_mean and func_std - :param kwargs: (dict, optional): residual kwargs passed on to func_mean and func_std - """ if assign_to_key is None: - assign_to_key = read_key + raise ValueError("Ambiguous assign to key") + # assign_to_key = read_key + + if left_kwargs is None: + left_kwargs = {} + + if right_kwargs is None: + right_kwargs = {} + + modules = [] + + # make left reference histograms + hist_name1 = hist_col + if func_left is not None and suffix1 is not None: + hist_name1 = f"{hist_name1}_{suffix1}" + hist_collector1 = ApplyFunc( + apply_to_key=left_read_key, + assign_to_key=assign_to_key, + ) + hist_collector1.add_apply_func( + func=func_left, + entire=True, + suffix=suffix1, + hist_name=hist_col, + **left_kwargs, + ) + modules.append(hist_collector1) + + # right left reference histograms + hist_name2 = hist_col + if func_right is not None and suffix2 is not None: + hist_name2 = f"{hist_name2}_{suffix2}" + hist_collector2 = ApplyFunc( + apply_to_key=right_read_key, + assign_to_key=assign_to_key, + ) + hist_collector2.add_apply_func( + func=func_right, + entire=True, + suffix=suffix2, + hist_name=hist_col, + **right_kwargs, + ) + modules.append(hist_collector2) - # make reference histogram(s) - hist_collector = ApplyFunc( - apply_to_key=read_key, - assign_to_key=assign_to_key, - ) - hist_collector.add_apply_func( - func=func_hist_collector, entire=True, suffix=suffix, *args, **kwargs - ) # do histogram comparison hist_comparer = ApplyFunc( apply_to_key=assign_to_key, @@ -141,15 +172,52 @@ def __init__( apply_funcs=[ { "func": hist_compare, - "hist_name1": hist_col, - "hist_name2": hist_col + "_" + suffix, - "prefix": suffix, + "hist_name1": hist_name1, + "hist_name2": hist_name2, + "prefix": prefix, "axis": 1, } ], ) + modules.append(hist_comparer) - super().__init__(modules=[hist_collector, hist_comparer]) + super().__init__(modules) + + +class HistComparer(GenericHistComparer): + """Base pipeline to compare histogram to previous rolling histograms""" + + def __init__( + self, + func_hist_collector: Callable, + read_key: str, + store_key: str, + hist_col: str, + suffix: str, + assign_to_key: Optional[str] = None, + **kwargs, + ): + """Initialize an instance of RollingHistComparer. + + :param func_hist_collector: histogram collection function + :param str read_key: key of input data to read from data store + :param str store_key: key of output data to store in data store + :param str assign_to_key: key of the input data to assign function applied-output to. (optional) + :param str hist_col: column/key in input df/dict that contains the histogram. default is 'histogram' + :param str suffix: column/key of rolling histogram. default is 'roll' -> column = 'histogram_roll' + :param args: (tuple, optional): residual args passed on to func_mean and func_std + :param kwargs: (dict, optional): residual kwargs passed on to func_mean and func_std + """ + super().__init__( + store_key, + right_read_key=read_key, + hist_col=hist_col, + assign_to_key=assign_to_key, + func_right=func_hist_collector, + right_kwargs=kwargs, + suffix2=suffix, + prefix=suffix, + ) class RollingHistComparer(HistComparer): @@ -177,12 +245,11 @@ def __init__( rolling_hist, read_key, store_key, - read_key, - hist_col, - suffix, + assign_to_key=read_key, + hist_col=hist_col, + suffix=suffix, window=window, shift=shift, - hist_name=hist_col, ) self.read_key = read_key self.window = window @@ -244,11 +311,10 @@ def __init__( expanding_hist, read_key, store_key, - read_key, - hist_col, - suffix, + assign_to_key=read_key, + hist_col=hist_col, + suffix=suffix, shift=shift, - hist_name=hist_col, ) self.read_key = read_key @@ -282,9 +348,9 @@ def __init__( hist_sum, reference_key, store_key, - assign_to_key, - hist_col, - suffix, + assign_to_key=assign_to_key, + hist_col=hist_col, + suffix=suffix, metrics=[hist_col], ) self.reference_key = reference_key @@ -297,6 +363,29 @@ def transform(self, datastore): return super().transform(datastore) +class RollingInputFixedReference(GenericHistComparer): + def __init__( + self, + read_key, + reference_key, + store_key, + assign_to_key=None, + window=1, + shift=1, + hist_col="histogram", + suffix1="roll", + suffix2="ref", + prefix="rollref", + ): + super().__init__( + read_key, # left read key + store_key, + # right_read_key = reference_key, + hist_col=hist_col, + assign_to_key=assign_to_key, + ) + + class NormHistComparer(Pipeline): """Base pipeline to compare histogram to normalized histograms""" diff --git a/popmon/pipeline/metrics_pipelines.py b/popmon/pipeline/metrics_pipelines.py index 6fc2c053..4cbf5d06 100644 --- a/popmon/pipeline/metrics_pipelines.py +++ b/popmon/pipeline/metrics_pipelines.py @@ -102,7 +102,9 @@ def get_splitting_modules( features=features, feature_begins_with=f"{time_axis}:", ), - PreviousHistComparer(read_key="split_hists", store_key="comparisons"), + PreviousHistComparer( + read_key="split_hists", store_key="comparisons", suffix="prev1" + ), HistProfiler(read_key="split_hists", store_key="profiles"), ] return modules @@ -226,6 +228,7 @@ def __init__( reference_key="split_hists", assign_to_key="split_hists", store_key="comparisons", + suffix=reference_prefix, ), RefMedianMadPullCalculator( reference_key="comparisons", @@ -288,6 +291,7 @@ def __init__( reference_key="split_ref_hists", assign_to_key="split_hists", store_key="comparisons", + suffix=reference_prefix, ), HistProfiler(read_key="split_ref_hists", store_key="ref_profiles"), RefMedianMadPullCalculator( @@ -343,6 +347,7 @@ def __init__( window=settings.comparison.window, shift=settings.comparison.shift, store_key="comparisons", + suffix=reference_prefix, ), RefMedianMadPullCalculator( reference_key="comparisons", @@ -399,6 +404,7 @@ def __init__( read_key="split_hists", shift=settings.comparison.shift, store_key="comparisons", + suffix=reference_prefix, ), # 4. profiling of histograms, then pull calculation compared with reference mean and std, # to obtain normalized residuals of profiles