diff --git a/cbrkit/case_sim/__init__.py b/cbrkit/case_sim/__init__.py index d9cecfb..deeecd5 100644 --- a/cbrkit/case_sim/__init__.py +++ b/cbrkit/case_sim/__init__.py @@ -1,5 +1,5 @@ from cbrkit.case_sim.generic import equality -from cbrkit.case_sim.helpers import aggregate, apply +from cbrkit.case_sim.helpers import aggregate, batchify from cbrkit.case_sim.tabular import factory as tabular -__all__ = ["apply", "aggregate", "tabular", "equality"] +__all__ = ["batchify", "aggregate", "tabular", "equality"] diff --git a/cbrkit/case_sim/generic.py b/cbrkit/case_sim/generic.py index f54c20b..115ee63 100644 --- a/cbrkit/case_sim/generic.py +++ b/cbrkit/case_sim/generic.py @@ -1,11 +1,11 @@ from typing import Any -from cbrkit.case_sim.helpers import apply +from cbrkit.case_sim.helpers import batchify from cbrkit.typing import CaseSimBatchFunc, SimilarityValue def equality() -> CaseSimBatchFunc[Any, Any]: - @apply + @batchify def wrapped_func(case: Any, query: Any) -> SimilarityValue: return case == query diff --git a/cbrkit/case_sim/helpers.py b/cbrkit/case_sim/helpers.py index 089e99f..70edd8c 100644 --- a/cbrkit/case_sim/helpers.py +++ b/cbrkit/case_sim/helpers.py @@ -15,7 +15,7 @@ ) -def apply( +def batchify( func: CaseSimFunc[CaseType], ) -> CaseSimBatchFunc[Any, CaseType]: def wrapped_func( diff --git a/cbrkit/data_sim/__init__.py b/cbrkit/data_sim/__init__.py index 7d43c67..f2fa981 100644 --- a/cbrkit/data_sim/__init__.py +++ b/cbrkit/data_sim/__init__.py @@ -1,9 +1,9 @@ from . import collections, generic, numeric, strings from .generic import equality, table -from .helpers import apply, dist2sim +from .helpers import batchify, dist2sim __all__ = [ - "apply", + "batchify", "dist2sim", "table", "equality", diff --git a/cbrkit/data_sim/collections.py b/cbrkit/data_sim/collections.py index 0b2288c..ca3ec8d 100644 --- a/cbrkit/data_sim/collections.py +++ b/cbrkit/data_sim/collections.py @@ -1,14 +1,14 @@ from collections.abc import Collection, Set from typing import Any -from cbrkit.data_sim.helpers import apply, dist2sim +from cbrkit.data_sim.helpers import batchify, dist2sim from cbrkit.typing import DataSimBatchFunc def jaccard() -> DataSimBatchFunc[Collection[Any]]: from nltk.metrics import jaccard_distance - @apply + @batchify def wrapped_func(x: Collection[Any], y: Collection[Any]) -> float: if not isinstance(x, Set): x = set(x) diff --git a/cbrkit/data_sim/generic.py b/cbrkit/data_sim/generic.py index 56036bf..3b493e4 100644 --- a/cbrkit/data_sim/generic.py +++ b/cbrkit/data_sim/generic.py @@ -1,7 +1,7 @@ from collections import defaultdict from typing import Any -from cbrkit.data_sim.helpers import apply +from cbrkit.data_sim.helpers import batchify from cbrkit.typing import ( DataSimBatchFunc, DataType, @@ -32,7 +32,7 @@ def wrapped_func(*args: tuple[DataType, DataType]) -> SimilaritySequence: def equality() -> DataSimBatchFunc[Any]: - @apply + @batchify def wrapped_func(x: Any, y: Any) -> SimilarityValue: return x == y diff --git a/cbrkit/data_sim/helpers.py b/cbrkit/data_sim/helpers.py index 0604deb..f0a3319 100644 --- a/cbrkit/data_sim/helpers.py +++ b/cbrkit/data_sim/helpers.py @@ -6,7 +6,7 @@ ) -def apply(func: DataSimFunc[DataType]) -> DataSimBatchFunc[DataType]: +def batchify(func: DataSimFunc[DataType]) -> DataSimBatchFunc[DataType]: def wrapped_func(*args: tuple[DataType, DataType]) -> SimilaritySequence: return [func(data1, data2) for (data1, data2) in args] diff --git a/cbrkit/data_sim/numeric.py b/cbrkit/data_sim/numeric.py index aae79ca..1c284d6 100644 --- a/cbrkit/data_sim/numeric.py +++ b/cbrkit/data_sim/numeric.py @@ -1,6 +1,6 @@ import math -from cbrkit.data_sim.helpers import apply +from cbrkit.data_sim.helpers import batchify from cbrkit.typing import DataSimBatchFunc, SimilarityValue Number = float | int @@ -18,7 +18,7 @@ def linear(max: float, min: float = 0.0) -> DataSimBatchFunc[Number]: ![linear](../../assets/numeric/linear.png) """ - @apply + @batchify def wrapped_func(x: Number, y: Number) -> SimilarityValue: return (max - abs(x - y)) / (max - min) @@ -34,7 +34,7 @@ def threshold(threshold: float) -> DataSimBatchFunc[Number]: ![threshold](../../assets/numeric/threshold.png) """ - @apply + @batchify def wrapped_func(x: Number, y: Number) -> SimilarityValue: return 1.0 if abs(x - y) <= threshold else 0.0 @@ -50,7 +50,7 @@ def exponential(alpha: float = 1.0) -> DataSimBatchFunc[Number]: ![exponential](../../assets/numeric/exponential.png) """ - @apply + @batchify def wrapped_func(x: Number, y: Number) -> SimilarityValue: return math.exp(-alpha * abs(x - y)) @@ -67,7 +67,7 @@ def sigmoid(alpha: float = 1.0, theta: float = 1.0) -> DataSimBatchFunc[Number]: ![sigmoid](../../assets/numeric/sigmoid.png) """ - @apply + @batchify def wrapped_func(x: Number, y: Number) -> SimilarityValue: return 1.0 / (1.0 + math.exp((abs(x - y) - theta) / alpha)) diff --git a/cbrkit/data_sim/strings.py b/cbrkit/data_sim/strings.py index a01428a..f61dd4f 100644 --- a/cbrkit/data_sim/strings.py +++ b/cbrkit/data_sim/strings.py @@ -1,7 +1,7 @@ import itertools from cbrkit.data_sim._taxonomy import Taxonomy, TaxonomyMeasure -from cbrkit.data_sim.helpers import apply +from cbrkit.data_sim.helpers import batchify from cbrkit.typing import ( DataSimBatchFunc, FilePath, @@ -31,7 +31,7 @@ def taxonomy( ) -> DataSimBatchFunc[str]: taxonomy = Taxonomy(path) - @apply + @batchify def wrapped_func(x: str, y: str) -> SimilarityValue: return taxonomy.similarity(x, y, measure) @@ -41,7 +41,7 @@ def wrapped_func(x: str, y: str) -> SimilarityValue: def levenshtein(score_cutoff: float | None = None) -> DataSimBatchFunc[str]: import Levenshtein - @apply + @batchify def wrapped_func(x: str, y: str) -> SimilarityValue: return Levenshtein.ratio(x, y, score_cutoff=score_cutoff) @@ -51,7 +51,7 @@ def wrapped_func(x: str, y: str) -> SimilarityValue: def jaro(score_cutoff: float | None = None) -> DataSimBatchFunc[str]: import Levenshtein - @apply + @batchify def wrapped_func(x: str, y: str) -> SimilarityValue: return Levenshtein.jaro(x, y, score_cutoff=score_cutoff) @@ -63,7 +63,7 @@ def jaro_winkler( ) -> DataSimBatchFunc[str]: import Levenshtein - @apply + @batchify def wrapped_func(x: str, y: str) -> SimilarityValue: return Levenshtein.jaro_winkler( x, y, score_cutoff=score_cutoff, prefix_weight=prefix_weight