Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add similarity measures for time-series data #80

Merged
merged 36 commits into from
Apr 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
0a2b6b9
add dtaidistance, minineedle dependency
Martin-Hoppe Mar 26, 2024
0c4df9a
remove string functions
Martin-Hoppe Mar 27, 2024
1eea598
remove unneccessary distance to similarity conversion
Martin-Hoppe Mar 27, 2024
3ab8e4e
add dtaidistance, minineedle dependency
Martin-Hoppe Mar 26, 2024
cea7a87
remove string functions
Martin-Hoppe Mar 27, 2024
2aad9c6
remove unneccessary distance to similarity conversion
Martin-Hoppe Mar 27, 2024
970f96b
Merge remote-tracking branch 'origin/changed' into changed
Martin-Hoppe Mar 27, 2024
789b4cd
new dependencies
Martin-Hoppe Mar 27, 2024
73bf0a5
ruff
Martin-Hoppe Mar 27, 2024
68a1f76
Merge pull request #1 from Martin-Hoppe/changed
Martin-Hoppe Mar 27, 2024
f1574f4
add empty line back
Martin-Hoppe Mar 27, 2024
b1ecaa7
removed spacy-double
Martin-Hoppe Mar 27, 2024
68ce2cc
remove comment about distance conversion
Martin-Hoppe Mar 27, 2024
c65050a
Merge pull request #2 from Martin-Hoppe/changed
Martin-Hoppe Mar 27, 2024
ee5f0d9
remove import
Martin-Hoppe Mar 27, 2024
c9196c9
Merge pull request #3 from Martin-Hoppe/changed
Martin-Hoppe Mar 27, 2024
8a9672f
Add boolean to allow to return distance instead of similarity
Martin-Hoppe Mar 27, 2024
f4c2f7a
add result to dtw
Martin-Hoppe Mar 28, 2024
5bce3b3
Merge pull request #4 from Martin-Hoppe/changed
Martin-Hoppe Mar 28, 2024
614412e
make it pass tests:
Martin-Hoppe Mar 28, 2024
6e817c5
Merge pull request #5 from Martin-Hoppe/changed
Martin-Hoppe Mar 28, 2024
fafc252
remake poetry file
Martin-Hoppe Mar 29, 2024
91538e1
Merge pull request #6 from Martin-Hoppe/changed
Martin-Hoppe Mar 29, 2024
d499d48
fix the failing tests (?)
Martin-Hoppe Mar 29, 2024
b78fb2c
Merge pull request #7 from Martin-Hoppe/changed
Martin-Hoppe Mar 29, 2024
46b7746
reformat
Martin-Hoppe Mar 29, 2024
2150ce2
Merge pull request #8 from Martin-Hoppe/changed
Martin-Hoppe Mar 29, 2024
69b4b6a
chore(deps): update actions/configure-pages action to v5 (#89)
renovate[bot] Mar 30, 2024
9ccb876
chore(deps): lock file maintenance (#90)
renovate[bot] Apr 1, 2024
a02d84f
update poetry lock
mirkolenz Apr 1, 2024
d71bc96
update flake
mirkolenz Apr 1, 2024
f71c854
show ci trace
mirkolenz Apr 1, 2024
bc278c7
update time series measures
mirkolenz Apr 1, 2024
a7d1537
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 1, 2024
ff16f83
update pre commit
mirkolenz Apr 1, 2024
71a3093
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 1, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
- uses: actions/checkout@v4
- uses: DeterminateSystems/nix-installer-action@v10
- uses: DeterminateSystems/magic-nix-cache-action@v4
- run: nix flake check
- run: nix flake check --show-trace
release:
runs-on: ubuntu-latest
needs: test
Expand Down Expand Up @@ -90,7 +90,7 @@ jobs:
- uses: actions/checkout@v4
with:
ref: ${{ needs.release.outputs.git-head }}
- uses: actions/configure-pages@v4
- uses: actions/configure-pages@v5
- uses: DeterminateSystems/nix-installer-action@v10
- uses: DeterminateSystems/magic-nix-cache-action@v4
- run: nix build .#docs
Expand Down
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@ default_language_version:
python: python3.11
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.1.3
rev: v0.3.4
hooks:
- id: ruff
- id: ruff-format
- repo: https://github.com/python-poetry/poetry
rev: 1.6.1
rev: 1.8.2
hooks:
- id: poetry-check
1 change: 0 additions & 1 deletion cbrkit/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

"""


from . import helpers, loaders, retrieval, sim, typing

__all__ = [
Expand Down
1 change: 0 additions & 1 deletion cbrkit/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
.. include:: ../cli.md
"""


from pathlib import Path

try:
Expand Down
67 changes: 65 additions & 2 deletions cbrkit/sim/collections.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
from collections.abc import Collection, Set
from collections.abc import Collection, Sequence, Set
from typing import Any

from cbrkit.helpers import dist2sim
from cbrkit.typing import SimPairFunc

__all__ = ["jaccard"]
Number = float | int

__all__ = ["jaccard", "smith_waterman", "dtw"]


def jaccard() -> SimPairFunc[Collection[Any], float]:
Expand All @@ -26,3 +28,64 @@ def wrapped_func(x: Collection[Any], y: Collection[Any]) -> float:
return dist2sim(jaccard_distance(x, y))

return wrapped_func


def smith_waterman(
match_score: int = 2, mismatch_penalty: int = -1, gap_penalty: int = -1
) -> SimPairFunc[Sequence[Any], float]:
"""
Performs the Smith-Waterman alignment with configurable scoring parameters. If no element matches it returns 0.0.

Args:
match_score: Score for matching characters. Defaults to 2.
mismatch_penalty: Penalty for mismatching characters. Defaults to -1.
gap_penalty: Penalty for gaps. Defaults to -1.

Example:
>>> sim = smith_waterman()
>>> sim("abcde", "fghe")
2
"""
from minineedle import core, smith

def wrapped_func(x: Sequence[Any], y: Sequence[Any]) -> float:
try:
alignment = smith.SmithWaterman(x, y)
alignment.change_matrix(
core.ScoreMatrix(
match=match_score, miss=mismatch_penalty, gap=gap_penalty
)
)
alignment.align()

return alignment.get_score()
except ZeroDivisionError:
return 0.0

return wrapped_func


def dtw() -> SimPairFunc[Collection[int], float]:
"""Dynamic Time Warping similarity function.

Examples:
>>> sim = dtw()
>>> sim([1, 2, 3], [1, 2, 3, 4])
0.5
"""
import numpy as np
from dtaidistance import dtw

def wrapped_func(
x: Collection[Number] | np.ndarray, y: Collection[Number] | np.ndarray
) -> float:
if not isinstance(x, np.ndarray):
x = np.array(x)
if not isinstance(y, np.ndarray):
y = np.array(y)

distance = dtw.distance(x, y)

return dist2sim(distance)

return wrapped_func
15 changes: 5 additions & 10 deletions cbrkit/typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,20 +32,17 @@ class FloatProtocol(Protocol):
class SimMapFunc(Protocol[KeyType, ValueType_contra, SimType_cov]):
def __call__(
self, x_map: Mapping[KeyType, ValueType_contra], y: ValueType_contra
) -> SimMap[KeyType, SimType_cov]:
...
) -> SimMap[KeyType, SimType_cov]: ...


class SimSeqFunc(Protocol[ValueType_contra, SimType_cov]):
def __call__(
self, pairs: Sequence[tuple[ValueType_contra, ValueType_contra]], /
) -> SimSeq[SimType_cov]:
...
) -> SimSeq[SimType_cov]: ...


class SimPairFunc(Protocol[ValueType_contra, SimType_cov]):
def __call__(self, x: ValueType_contra, y: ValueType_contra, /) -> SimType_cov:
...
def __call__(self, x: ValueType_contra, y: ValueType_contra, /) -> SimType_cov: ...


AnySimFunc = (
Expand All @@ -60,14 +57,12 @@ def __call__(
self,
similarities: SimSeqOrMap[KeyType, SimType_contra],
/,
) -> float:
...
) -> float: ...


class PoolingFunc(Protocol):
def __call__(
self,
similarities: SimSeq[float],
/,
) -> float:
...
) -> float: ...
6 changes: 3 additions & 3 deletions flake.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions flake.nix
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
extras = [
"cli"
"nlp"
"timeseries"
];
};
in
Expand Down
Loading