diff --git a/qualle/__init__.py b/qualle/__init__.py index 965d556..5a9d07a 100644 --- a/qualle/__init__.py +++ b/qualle/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2021 ZBW – Leibniz Information Centre for Economics +# Copyright 2021-2022 ZBW – Leibniz Information Centre for Economics # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/qualle/evaluate.py b/qualle/evaluate.py index 2ef6484..120537c 100644 --- a/qualle/evaluate.py +++ b/qualle/evaluate.py @@ -1,4 +1,4 @@ -# Copyright 2021 ZBW – Leibniz Information Centre for Economics +# Copyright 2021-2022 ZBW – Leibniz Information Centre for Economics # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/qualle/features/__init__.py b/qualle/features/__init__.py index 965d556..5a9d07a 100644 --- a/qualle/features/__init__.py +++ b/qualle/features/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2021 ZBW – Leibniz Information Centre for Economics +# Copyright 2021-2022 ZBW – Leibniz Information Centre for Economics # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/qualle/features/base.py b/qualle/features/base.py index c642a74..dfcb876 100644 --- a/qualle/features/base.py +++ b/qualle/features/base.py @@ -1,4 +1,4 @@ -# Copyright 2021 ZBW – Leibniz Information Centre for Economics +# Copyright 2021-2022 ZBW – Leibniz Information Centre for Economics # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/qualle/features/combined.py b/qualle/features/combined.py index 91dc1ad..4eb53b1 100644 --- a/qualle/features/combined.py +++ b/qualle/features/combined.py @@ -1,4 +1,4 @@ -# Copyright 2021 ZBW – Leibniz Information Centre for Economics +# Copyright 2021-2022 ZBW – Leibniz Information Centre for Economics # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/qualle/features/confidence.py b/qualle/features/confidence.py index 8620a8e..16ad264 100644 --- a/qualle/features/confidence.py +++ b/qualle/features/confidence.py @@ -1,4 +1,4 @@ -# Copyright 2021 ZBW – Leibniz Information Centre for Economics +# Copyright 2021-2022 ZBW – Leibniz Information Centre for Economics # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/qualle/features/label_calibration/__init__.py b/qualle/features/label_calibration/__init__.py index 965d556..5a9d07a 100644 --- a/qualle/features/label_calibration/__init__.py +++ b/qualle/features/label_calibration/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2021 ZBW – Leibniz Information Centre for Economics +# Copyright 2021-2022 ZBW – Leibniz Information Centre for Economics # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/qualle/features/label_calibration/base.py b/qualle/features/label_calibration/base.py index f287d79..7d9a7b2 100644 --- a/qualle/features/label_calibration/base.py +++ b/qualle/features/label_calibration/base.py @@ -1,4 +1,4 @@ -# Copyright 2021 ZBW – Leibniz Information Centre for Economics +# Copyright 2021-2022 ZBW – Leibniz Information Centre for Economics # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/qualle/features/label_calibration/simple_label_calibration.py b/qualle/features/label_calibration/simple_label_calibration.py index 8ba955e..ab6bad6 100644 --- a/qualle/features/label_calibration/simple_label_calibration.py +++ b/qualle/features/label_calibration/simple_label_calibration.py @@ -1,4 +1,4 @@ -# Copyright 2021 ZBW – Leibniz Information Centre for Economics +# Copyright 2021-2022 ZBW – Leibniz Information Centre for Economics # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/qualle/features/label_calibration/thesauri_label_calibration.py b/qualle/features/label_calibration/thesauri_label_calibration.py index 0a2d419..f0c7ca8 100644 --- a/qualle/features/label_calibration/thesauri_label_calibration.py +++ b/qualle/features/label_calibration/thesauri_label_calibration.py @@ -1,4 +1,4 @@ -# Copyright 2021 ZBW – Leibniz Information Centre for Economics +# Copyright 2021-2022 ZBW – Leibniz Information Centre for Economics # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/qualle/features/text.py b/qualle/features/text.py index 37b81fa..8dd4d2b 100644 --- a/qualle/features/text.py +++ b/qualle/features/text.py @@ -1,4 +1,4 @@ -# Copyright 2021 ZBW – Leibniz Information Centre for Economics +# Copyright 2021-2022 ZBW – Leibniz Information Centre for Economics # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/qualle/interface/__init__.py b/qualle/interface/__init__.py index 965d556..5a9d07a 100644 --- a/qualle/interface/__init__.py +++ b/qualle/interface/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2021 ZBW – Leibniz Information Centre for Economics +# Copyright 2021-2022 ZBW – Leibniz Information Centre for Economics # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/qualle/interface/cli.py b/qualle/interface/cli.py index 539864e..8d27d1b 100644 --- a/qualle/interface/cli.py +++ b/qualle/interface/cli.py @@ -1,4 +1,4 @@ -# Copyright 2021 ZBW – Leibniz Information Centre for Economics +# Copyright 2021-2022 ZBW – Leibniz Information Centre for Economics # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/qualle/interface/config.py b/qualle/interface/config.py index 3840a18..a2ac044 100644 --- a/qualle/interface/config.py +++ b/qualle/interface/config.py @@ -1,4 +1,4 @@ -# Copyright 2021 ZBW – Leibniz Information Centre for Economics +# Copyright 2021-2022 ZBW – Leibniz Information Centre for Economics # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/qualle/interface/internal.py b/qualle/interface/internal.py index 5da7c20..b6fe2c0 100644 --- a/qualle/interface/internal.py +++ b/qualle/interface/internal.py @@ -1,4 +1,4 @@ -# Copyright 2021 ZBW – Leibniz Information Centre for Economics +# Copyright 2021-2022 ZBW – Leibniz Information Centre for Economics # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/qualle/interface/rest.py b/qualle/interface/rest.py index bf1a2b7..633c8ad 100644 --- a/qualle/interface/rest.py +++ b/qualle/interface/rest.py @@ -1,4 +1,4 @@ -# Copyright 2021 ZBW – Leibniz Information Centre for Economics +# Copyright 2021-2022 ZBW – Leibniz Information Centre for Economics # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/qualle/label_calibration/__init__.py b/qualle/label_calibration/__init__.py index 965d556..5a9d07a 100644 --- a/qualle/label_calibration/__init__.py +++ b/qualle/label_calibration/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2021 ZBW – Leibniz Information Centre for Economics +# Copyright 2021-2022 ZBW – Leibniz Information Centre for Economics # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/qualle/label_calibration/category.py b/qualle/label_calibration/category.py index deaeb88..ef8ddcd 100644 --- a/qualle/label_calibration/category.py +++ b/qualle/label_calibration/category.py @@ -1,4 +1,4 @@ -# Copyright 2021 ZBW – Leibniz Information Centre for Economics +# Copyright 2021-2022 ZBW – Leibniz Information Centre for Economics # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/qualle/label_calibration/simple.py b/qualle/label_calibration/simple.py index 66cde89..4d429a8 100644 --- a/qualle/label_calibration/simple.py +++ b/qualle/label_calibration/simple.py @@ -1,4 +1,4 @@ -# Copyright 2021 ZBW – Leibniz Information Centre for Economics +# Copyright 2021-2022 ZBW – Leibniz Information Centre for Economics # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/qualle/main.py b/qualle/main.py index bdff148..79379d3 100644 --- a/qualle/main.py +++ b/qualle/main.py @@ -1,4 +1,4 @@ -# Copyright 2021 ZBW – Leibniz Information Centre for Economics +# Copyright 2021-2022 ZBW – Leibniz Information Centre for Economics # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/qualle/models.py b/qualle/models.py index e346ace..ede2b44 100644 --- a/qualle/models.py +++ b/qualle/models.py @@ -1,4 +1,4 @@ -# Copyright 2021 ZBW – Leibniz Information Centre for Economics +# Copyright 2021-2022 ZBW – Leibniz Information Centre for Economics # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/qualle/pipeline.py b/qualle/pipeline.py index 54967e6..74081c7 100644 --- a/qualle/pipeline.py +++ b/qualle/pipeline.py @@ -1,4 +1,4 @@ -# Copyright 2021 ZBW – Leibniz Information Centre for Economics +# Copyright 2021-2022 ZBW – Leibniz Information Centre for Economics # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. from contextlib import contextmanager -from typing import List, Callable, Any +from typing import List, Callable, Any, Collection from sklearn.model_selection import cross_val_predict @@ -65,15 +65,67 @@ def train(self, data: TrainData): self._recall_predictor.fit(features_data, true_recall) def predict(self, data: PredictData) -> List[float]: - predicted_no_of_labels = self._label_calibrator.predict(data.docs) - label_calibration_data = LabelCalibrationData( - predicted_labels=data.predicted_labels, - predicted_no_of_labels=predicted_no_of_labels - ) - features_data = self._features_data_mapper( - data, label_calibration_data + zero_idxs = self._get_pdata_idxs_with_zero_labels(data) + data_with_labels = self._get_pdata_with_labels(data, zero_idxs) + if data_with_labels.docs: + predicted_no_of_labels = self._label_calibrator.predict( + data_with_labels.docs + ) + label_calibration_data = LabelCalibrationData( + predicted_labels=data_with_labels.predicted_labels, + predicted_no_of_labels=predicted_no_of_labels, + ) + features_data = self._features_data_mapper( + data_with_labels, label_calibration_data + ) + predicted_recall = self._recall_predictor.predict( + features_data + ) + recall_scores = self._merge_zero_recall_with_predicted_recall( + predicted_recall=predicted_recall, + zero_labels_idx=zero_idxs, + ) + else: + recall_scores = [0] * len(data.predicted_labels) + return recall_scores + + @staticmethod + def _get_pdata_idxs_with_zero_labels(data: PredictData) -> Collection[int]: + return [ + i for i in range(len(data.predicted_labels)) + if not data.predicted_labels[i] + ] + + @staticmethod + def _get_pdata_with_labels( + data: PredictData, zero_labels_idxs: Collection[int] + ) -> PredictData: + non_zero_idxs = [ + i for i in range(len(data.predicted_labels)) + if i not in zero_labels_idxs + ] + return PredictData( + docs=[data.docs[i] for i in non_zero_idxs], + predicted_labels=[data.predicted_labels[i] for i in non_zero_idxs], + scores=[data.scores[i] for i in non_zero_idxs], ) - return self._recall_predictor.predict(features_data) + + @staticmethod + def _merge_zero_recall_with_predicted_recall( + predicted_recall: List[float], + zero_labels_idx: Collection[int], + ): + recall_scores = [] + j = 0 + for i in range( + len(zero_labels_idx) + + len(predicted_recall)): + if i in zero_labels_idx: + recall_scores.append(0) + else: + recall_scores.append(predicted_recall[j]) + j += 1 + return recall_scores @contextmanager def _debug(self, method_name): diff --git a/qualle/quality_estimation.py b/qualle/quality_estimation.py index 99c92c7..26828b4 100644 --- a/qualle/quality_estimation.py +++ b/qualle/quality_estimation.py @@ -1,4 +1,4 @@ -# Copyright 2021 ZBW – Leibniz Information Centre for Economics +# Copyright 2021-2022 ZBW – Leibniz Information Centre for Economics # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/qualle/train.py b/qualle/train.py index 77670ea..505918e 100644 --- a/qualle/train.py +++ b/qualle/train.py @@ -1,4 +1,4 @@ -# Copyright 2021 ZBW – Leibniz Information Centre for Economics +# Copyright 2021-2022 ZBW – Leibniz Information Centre for Economics # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/qualle/utils.py b/qualle/utils.py index f0e1d41..66173f4 100644 --- a/qualle/utils.py +++ b/qualle/utils.py @@ -1,4 +1,4 @@ -# Copyright 2021 ZBW – Leibniz Information Centre for Economics +# Copyright 2021-2022 ZBW – Leibniz Information Centre for Economics # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/__init__.py b/tests/__init__.py index 965d556..5a9d07a 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2021 ZBW – Leibniz Information Centre for Economics +# Copyright 2021-2022 ZBW – Leibniz Information Centre for Economics # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/common.py b/tests/common.py index 4288dbd..0a04d8f 100644 --- a/tests/common.py +++ b/tests/common.py @@ -1,4 +1,4 @@ -# Copyright 2021 ZBW – Leibniz Information Centre for Economics +# Copyright 2021-2022 ZBW – Leibniz Information Centre for Economics # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/conftest.py b/tests/conftest.py index a4c783b..0da8bd0 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,4 +1,4 @@ -# Copyright 2021 ZBW – Leibniz Information Centre for Economics +# Copyright 2021-2022 ZBW – Leibniz Information Centre for Economics # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/features/__init__.py b/tests/features/__init__.py index 965d556..5a9d07a 100644 --- a/tests/features/__init__.py +++ b/tests/features/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2021 ZBW – Leibniz Information Centre for Economics +# Copyright 2021-2022 ZBW – Leibniz Information Centre for Economics # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/features/label_calibration/__init__.py b/tests/features/label_calibration/__init__.py index 965d556..5a9d07a 100644 --- a/tests/features/label_calibration/__init__.py +++ b/tests/features/label_calibration/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2021 ZBW – Leibniz Information Centre for Economics +# Copyright 2021-2022 ZBW – Leibniz Information Centre for Economics # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/features/label_calibration/test_simple_label_calibration/__init__.py b/tests/features/label_calibration/test_simple_label_calibration/__init__.py index 965d556..5a9d07a 100644 --- a/tests/features/label_calibration/test_simple_label_calibration/__init__.py +++ b/tests/features/label_calibration/test_simple_label_calibration/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2021 ZBW – Leibniz Information Centre for Economics +# Copyright 2021-2022 ZBW – Leibniz Information Centre for Economics # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/features/label_calibration/test_simple_label_calibration/test_calibrator.py b/tests/features/label_calibration/test_simple_label_calibration/test_calibrator.py index 56e8083..269f617 100644 --- a/tests/features/label_calibration/test_simple_label_calibration/test_calibrator.py +++ b/tests/features/label_calibration/test_simple_label_calibration/test_calibrator.py @@ -1,4 +1,4 @@ -# Copyright 2021 ZBW – Leibniz Information Centre for Economics +# Copyright 2021-2022 ZBW – Leibniz Information Centre for Economics # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/features/label_calibration/test_simple_label_calibration/test_features.py b/tests/features/label_calibration/test_simple_label_calibration/test_features.py index 42c413d..8535012 100644 --- a/tests/features/label_calibration/test_simple_label_calibration/test_features.py +++ b/tests/features/label_calibration/test_simple_label_calibration/test_features.py @@ -1,4 +1,4 @@ -# Copyright 2021 ZBW – Leibniz Information Centre for Economics +# Copyright 2021-2022 ZBW – Leibniz Information Centre for Economics # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/features/label_calibration/test_thesauri_label_calibration/__init__.py b/tests/features/label_calibration/test_thesauri_label_calibration/__init__.py index 965d556..5a9d07a 100644 --- a/tests/features/label_calibration/test_thesauri_label_calibration/__init__.py +++ b/tests/features/label_calibration/test_thesauri_label_calibration/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2021 ZBW – Leibniz Information Centre for Economics +# Copyright 2021-2022 ZBW – Leibniz Information Centre for Economics # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/features/label_calibration/test_thesauri_label_calibration/common.py b/tests/features/label_calibration/test_thesauri_label_calibration/common.py index 8ea1f57..2c968f1 100644 --- a/tests/features/label_calibration/test_thesauri_label_calibration/common.py +++ b/tests/features/label_calibration/test_thesauri_label_calibration/common.py @@ -1,4 +1,4 @@ -# Copyright 2021 ZBW – Leibniz Information Centre for Economics +# Copyright 2021-2022 ZBW – Leibniz Information Centre for Economics # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/features/label_calibration/test_thesauri_label_calibration/conftest.py b/tests/features/label_calibration/test_thesauri_label_calibration/conftest.py index c29652c..22277c3 100644 --- a/tests/features/label_calibration/test_thesauri_label_calibration/conftest.py +++ b/tests/features/label_calibration/test_thesauri_label_calibration/conftest.py @@ -1,4 +1,4 @@ -# Copyright 2021 ZBW – Leibniz Information Centre for Economics +# Copyright 2021-2022 ZBW – Leibniz Information Centre for Economics # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/features/label_calibration/test_thesauri_label_calibration/test_calibrator.py b/tests/features/label_calibration/test_thesauri_label_calibration/test_calibrator.py index e064c25..76bb955 100644 --- a/tests/features/label_calibration/test_thesauri_label_calibration/test_calibrator.py +++ b/tests/features/label_calibration/test_thesauri_label_calibration/test_calibrator.py @@ -1,4 +1,4 @@ -# Copyright 2021 ZBW – Leibniz Information Centre for Economics +# Copyright 2021-2022 ZBW – Leibniz Information Centre for Economics # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/features/label_calibration/test_thesauri_label_calibration/test_features.py b/tests/features/label_calibration/test_thesauri_label_calibration/test_features.py index 3782fa1..db6e322 100644 --- a/tests/features/label_calibration/test_thesauri_label_calibration/test_features.py +++ b/tests/features/label_calibration/test_thesauri_label_calibration/test_features.py @@ -1,4 +1,4 @@ -# Copyright 2021 ZBW – Leibniz Information Centre for Economics +# Copyright 2021-2022 ZBW – Leibniz Information Centre for Economics # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/features/label_calibration/test_thesauri_label_calibration/test_transformer.py b/tests/features/label_calibration/test_thesauri_label_calibration/test_transformer.py index 23bfb3f..b62ca74 100644 --- a/tests/features/label_calibration/test_thesauri_label_calibration/test_transformer.py +++ b/tests/features/label_calibration/test_thesauri_label_calibration/test_transformer.py @@ -1,4 +1,4 @@ -# Copyright 2021 ZBW – Leibniz Information Centre for Economics +# Copyright 2021-2022 ZBW – Leibniz Information Centre for Economics # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/features/test_combined.py b/tests/features/test_combined.py index 11c7c62..5df330e 100644 --- a/tests/features/test_combined.py +++ b/tests/features/test_combined.py @@ -1,4 +1,4 @@ -# Copyright 2021 ZBW – Leibniz Information Centre for Economics +# Copyright 2021-2022 ZBW – Leibniz Information Centre for Economics # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/features/test_confidence.py b/tests/features/test_confidence.py index 1b68bc7..a4159fa 100644 --- a/tests/features/test_confidence.py +++ b/tests/features/test_confidence.py @@ -1,4 +1,4 @@ -# Copyright 2021 ZBW – Leibniz Information Centre for Economics +# Copyright 2021-2022 ZBW – Leibniz Information Centre for Economics # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/features/test_text.py b/tests/features/test_text.py index ad3f3a3..ab60ef9 100644 --- a/tests/features/test_text.py +++ b/tests/features/test_text.py @@ -1,4 +1,4 @@ -# Copyright 2021 ZBW – Leibniz Information Centre for Economics +# Copyright 2021-2022 ZBW – Leibniz Information Centre for Economics # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/interface/__init__.py b/tests/interface/__init__.py index 965d556..5a9d07a 100644 --- a/tests/interface/__init__.py +++ b/tests/interface/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2021 ZBW – Leibniz Information Centre for Economics +# Copyright 2021-2022 ZBW – Leibniz Information Centre for Economics # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/interface/common.py b/tests/interface/common.py index c4bbab3..31d6cea 100644 --- a/tests/interface/common.py +++ b/tests/interface/common.py @@ -1,4 +1,4 @@ -# Copyright 2021 ZBW – Leibniz Information Centre for Economics +# Copyright 2021-2022 ZBW – Leibniz Information Centre for Economics # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/interface/test_cli.py b/tests/interface/test_cli.py index 9d3f722..cf923b9 100644 --- a/tests/interface/test_cli.py +++ b/tests/interface/test_cli.py @@ -1,4 +1,4 @@ -# Copyright 2021 ZBW – Leibniz Information Centre for Economics +# Copyright 2021-2022 ZBW – Leibniz Information Centre for Economics # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/interface/test_internal.py b/tests/interface/test_internal.py index faa37b6..f7040a2 100644 --- a/tests/interface/test_internal.py +++ b/tests/interface/test_internal.py @@ -1,4 +1,4 @@ -# Copyright 2021 ZBW – Leibniz Information Centre for Economics +# Copyright 2021-2022 ZBW – Leibniz Information Centre for Economics # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/interface/test_rest.py b/tests/interface/test_rest.py index db21107..d925613 100644 --- a/tests/interface/test_rest.py +++ b/tests/interface/test_rest.py @@ -1,4 +1,4 @@ -# Copyright 2021 ZBW – Leibniz Information Centre for Economics +# Copyright 2021-2022 ZBW – Leibniz Information Centre for Economics # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/label_calibration/__init__.py b/tests/label_calibration/__init__.py index 965d556..5a9d07a 100644 --- a/tests/label_calibration/__init__.py +++ b/tests/label_calibration/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2021 ZBW – Leibniz Information Centre for Economics +# Copyright 2021-2022 ZBW – Leibniz Information Centre for Economics # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/label_calibration/conftest.py b/tests/label_calibration/conftest.py index 030edaa..c3d2415 100644 --- a/tests/label_calibration/conftest.py +++ b/tests/label_calibration/conftest.py @@ -1,4 +1,4 @@ -# Copyright 2021 ZBW – Leibniz Information Centre for Economics +# Copyright 2021-2022 ZBW – Leibniz Information Centre for Economics # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/label_calibration/test_category.py b/tests/label_calibration/test_category.py index 5bd4354..b8259e7 100644 --- a/tests/label_calibration/test_category.py +++ b/tests/label_calibration/test_category.py @@ -1,4 +1,4 @@ -# Copyright 2021 ZBW – Leibniz Information Centre for Economics +# Copyright 2021-2022 ZBW – Leibniz Information Centre for Economics # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/label_calibration/test_simple.py b/tests/label_calibration/test_simple.py index 8674b3a..19b3b6c 100644 --- a/tests/label_calibration/test_simple.py +++ b/tests/label_calibration/test_simple.py @@ -1,4 +1,4 @@ -# Copyright 2021 ZBW – Leibniz Information Centre for Economics +# Copyright 2021-2022 ZBW – Leibniz Information Centre for Economics # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/test_eval.py b/tests/test_eval.py index a45fce0..abe296a 100644 --- a/tests/test_eval.py +++ b/tests/test_eval.py @@ -1,4 +1,4 @@ -# Copyright 2021 ZBW – Leibniz Information Centre for Economics +# Copyright 2021-2022 ZBW – Leibniz Information Centre for Economics # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/test_integration.py b/tests/test_integration.py index 07ca830..b428bdc 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -1,4 +1,4 @@ -# Copyright 2021 ZBW – Leibniz Information Centre for Economics +# Copyright 2021-2022 ZBW – Leibniz Information Centre for Economics # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index 2fe3550..3e282c2 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -1,4 +1,4 @@ -# Copyright 2021 ZBW – Leibniz Information Centre for Economics +# Copyright 2021-2022 ZBW – Leibniz Information Centre for Economics # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -58,6 +58,20 @@ def train_data(): ) +@pytest.fixture +def train_data_with_some_empty_labels(train_data): + train_data.predict_data.predicted_labels = [['c'], [], ['c'], [], ['c']] + + return train_data + + +@pytest.fixture +def train_data_with_all_empty_labels(train_data): + train_data.predict_data.predicted_labels = [[]] * 5 + + return train_data + + def test_train(qp, train_data, mocker): calibrator = qp._label_calibrator mocker.spy(calibrator, 'fit') @@ -99,6 +113,24 @@ def test_predict(qp, train_data): assert np.array_equal(qp.predict(p_data), [1] * 5) +def test_predict_with_some_empty_labels_returns_zero_recall( + qp, train_data_with_some_empty_labels): + p_data = train_data_with_some_empty_labels.predict_data + + qp.train(train_data_with_some_empty_labels) + + assert np.array_equal(qp.predict(p_data), [1, 0, 1, 0, 1]) + + +def test_predict_with_all_empty_labels_returns_only_zero_recall( + qp, train_data_with_all_empty_labels): + p_data = train_data_with_all_empty_labels.predict_data + + qp.train(train_data_with_all_empty_labels) + + assert np.array_equal(qp.predict(p_data), [0] * 5) + + def test_debug_prints_time_if_activated(qp, caplog): qp._should_debug = True caplog.set_level(logging.DEBUG) diff --git a/tests/test_qe.py b/tests/test_qe.py index bcf51b8..baf7cc0 100644 --- a/tests/test_qe.py +++ b/tests/test_qe.py @@ -1,4 +1,4 @@ -# Copyright 2021 ZBW – Leibniz Information Centre for Economics +# Copyright 2021-2022 ZBW – Leibniz Information Centre for Economics # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/test_train.py b/tests/test_train.py index 3c8eb5f..aa5af5a 100644 --- a/tests/test_train.py +++ b/tests/test_train.py @@ -1,4 +1,4 @@ -# Copyright 2021 ZBW – Leibniz Information Centre for Economics +# Copyright 2021-2022 ZBW – Leibniz Information Centre for Economics # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/test_utils.py b/tests/test_utils.py index d28ca67..aee0a6b 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,4 +1,4 @@ -# Copyright 2021 ZBW – Leibniz Information Centre for Economics +# Copyright 2021-2022 ZBW – Leibniz Information Centre for Economics # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License.