From 9ebc74ef32b259333c4d4a3d6e21c6ae023be1da Mon Sep 17 00:00:00 2001 From: Sebastian Schmidl Date: Sun, 16 Jul 2023 21:14:16 +0200 Subject: [PATCH] Further tooling: black, flake8, and precommit-hooks (#32) * feat: add black, flake8 and precommit-hooks * chore: run precommit-hooks on all files and fix all existing errors * feat: improve documentation regarding contributions and intended uses of GutenTAG * fix: pre-commit-dependency version specification * fix: downgrade flake8 to 5.0.4 to support Python 3.7 * chore: remove obsolete gitlab CI configuration * fix: downgrade pre-commit to 2.21 to support Python 3.7 * chore: remove additional generation configurations (they were accidentially added in a previous commit) --- .github/workflows/build.yml | 9 +- .github/workflows/test-pr.yml | 9 +- .gitlab-ci.yml | 72 -- .pre-commit-config.yaml | 26 + CONTRIBUTING.md | 36 + README.md | 7 +- doc/advanced-features.md | 2 +- doc/introduction/base-oscillations.md | 2 +- generation_configs/multivariate-test-cases.py | 637 +++++++++--------- gutenTAG/__main__.py | 67 +- gutenTAG/addons/__init__.py | 19 +- gutenTAG/addons/builtin.py | 2 +- gutenTAG/addons/timeeval.py | 110 ++- gutenTAG/anomalies/__init__.py | 70 +- gutenTAG/anomalies/types/__init__.py | 28 +- gutenTAG/anomalies/types/amplitude.py | 53 +- gutenTAG/anomalies/types/extremum.py | 18 +- gutenTAG/anomalies/types/frequency.py | 10 +- gutenTAG/anomalies/types/kind.py | 32 +- gutenTAG/anomalies/types/mean.py | 8 +- gutenTAG/anomalies/types/mode_correlation.py | 10 +- gutenTAG/anomalies/types/pattern.py | 78 ++- gutenTAG/anomalies/types/pattern_shift.py | 40 +- gutenTAG/anomalies/types/platform.py | 4 +- gutenTAG/anomalies/types/trend.py | 24 +- gutenTAG/anomalies/types/variance.py | 25 +- gutenTAG/api/__init__.py | 15 +- gutenTAG/base_oscillations/cosine.py | 39 +- gutenTAG/base_oscillations/custom_input.py | 4 +- .../base_oscillations/cylinder_bell_funnel.py | 81 ++- gutenTAG/base_oscillations/dirichlet.py | 51 +- gutenTAG/base_oscillations/ecg.py | 55 +- gutenTAG/base_oscillations/formula.py | 40 +- gutenTAG/base_oscillations/interface.py | 124 +++- gutenTAG/base_oscillations/mls.py | 49 +- gutenTAG/base_oscillations/polynomial.py | 25 +- .../base_oscillations/random_mode_jump.py | 65 +- gutenTAG/base_oscillations/random_walk.py | 41 +- gutenTAG/base_oscillations/sawtooth.py | 43 +- gutenTAG/base_oscillations/sine.py | 39 +- gutenTAG/base_oscillations/square.py | 43 +- .../utils/math_func_support.py | 7 +- gutenTAG/config/parser.py | 82 ++- gutenTAG/config/schema_loader.py | 2 +- gutenTAG/config/validator.py | 97 ++- gutenTAG/consolidator.py | 45 +- gutenTAG/generator/overview.py | 29 +- gutenTAG/generator/timeseries.py | 115 +++- gutenTAG/gutenTAG.py | 122 ++-- gutenTAG/utils/compatibility.py | 40 +- gutenTAG/utils/default_values.py | 6 +- gutenTAG/utils/global_variables.py | 2 +- gutenTAG/utils/logger.py | 4 +- gutenTAG/utils/types.py | 27 +- requirements.dev | 3 + setup.cfg | 24 +- setup.py | 40 +- tests/__init__.py | 2 +- tests/test_addons.py | 15 +- tests/test_base_oscillations/test_formula.py | 25 +- tests/test_generator/test_compatibility.py | 26 +- .../test_generator/test_overview_sanitizer.py | 26 +- tests/test_integration/__init__.py | 11 +- tests/test_integration/test_anomaly_types.py | 64 +- .../test_base_oscillations.py | 147 ++-- .../test_benchmark_generation.py | 2 +- tests/test_parallel.py | 12 +- tests/test_seeding.py | 32 +- 68 files changed, 1968 insertions(+), 1151 deletions(-) delete mode 100644 .gitlab-ci.yml create mode 100644 .pre-commit-config.yaml create mode 100644 CONTRIBUTING.md diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index a3bea9f..33721f1 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -12,9 +12,10 @@ defaults: shell: bash -l {0} jobs: - typecheck: - name: Typecheck GutenTAG on ubuntu with python 3.7 + check: + name: Run checks for GutenTAG on ubuntu with python 3.7 runs-on: ubuntu-latest + steps: - uses: actions/checkout@v3 - name: Setup Miniconda @@ -29,6 +30,10 @@ jobs: - name: Typcheck with mypy run: | python setup.py typecheck + - name: Lint with flake8 + run: | + flake8 . --count --show-source --statistics + test: name: Test GutenTAG on ${{ matrix.os }} with python ${{ matrix.python_version }} runs-on: ${{ matrix.os }} diff --git a/.github/workflows/test-pr.yml b/.github/workflows/test-pr.yml index 475585c..2fbb345 100644 --- a/.github/workflows/test-pr.yml +++ b/.github/workflows/test-pr.yml @@ -11,9 +11,10 @@ defaults: shell: bash -l {0} jobs: - typecheck: - name: Typecheck GutenTAG on ubuntu with python 3.7 + check: + name: Run checks for GutenTAG on ubuntu with python 3.7 runs-on: ubuntu-latest + steps: - uses: actions/checkout@v3 - name: Setup Miniconda @@ -28,6 +29,10 @@ jobs: - name: Typcheck with mypy run: | python setup.py typecheck + - name: Lint with flake8 + run: | + flake8 . --count --show-source --statistics + test: name: Test GutenTAG on ${{ matrix.os }} with python ${{ matrix.python_version }} runs-on: ${{ matrix.os }} diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml deleted file mode 100644 index e0149b0..0000000 --- a/.gitlab-ci.yml +++ /dev/null @@ -1,72 +0,0 @@ -# akita conda runner uses the following image by default: -# image: continuumio/miniconda3:4.9.2 - -stages: - - test - - build - - deploy - -workflow: - rules: - # If `$FORCE_GITLAB_CI` is set, create a pipeline. - - if: '$FORCE_GITLAB_CI' - # For merge requests, create a pipeline. - - if: '$CI_MERGE_REQUEST_IID' - # For `main` branch, create a pipeline (this includes on schedules, pushes, merges, etc.). - - if: '$CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH' - # For tags, create a pipeline. - - if: '$CI_COMMIT_TAG' - # For branches starting with 'doc/' don't create a pipeline - - if: '$CI_COMMIT_BRANCH =~ /^doc\//' - when: never - -default: - tags: - - akita - - conda - before_script: - - export PIP_CACHE_DIR="/opt/cache/pip" - - echo "Setting up conda environment and installing dependencies" - - conda create -y -c conda-forge -n gutentag python=${PYTHON_VERSION:-3.7} - - source activate gutentag - - pip install -r requirements.dev - - python --version - -typecheck: - stage: test - interruptible: true - script: - - python setup.py typecheck - -test: - stage: test - interruptible: true - script: - - python setup.py test - artifacts: - reports: - coverage_report: - coverage_format: cobertura - path: coverage.xml - parallel: - matrix: - - PYTHON_VERSION: ["3.7", "3.8", "3.9", "3.10", "3.11"] - -build: - stage: build - only: - - tags - script: - - python setup.py sdist bdist_wheel - artifacts: - paths: - - dist/*.tar.gz - - dist/*.whl - -deploy: - stage: deploy - only: - - tags - script: - - TWINE_PASSWORD=${CI_JOB_TOKEN} TWINE_USERNAME=gitlab-ci-token python -m twine upload --repository-url https://gitlab.hpi.de/api/v4/projects/${CI_PROJECT_ID}/packages/pypi dist/* - - TWINE_PASSWORD="${PYPI_API_TOKEN}" TWINE_USERNAME="__token__" python -m twine upload --repository pypi dist/* diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..e9267ae --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,26 @@ +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.0.0 + hooks: + - id: end-of-file-fixer + - id: trailing-whitespace + args: [--markdown-linebreak-ext=md] + - id: check-case-conflict + - id: mixed-line-ending + args: [--fix=lf] +- repo: https://github.com/pre-commit/mirrors-mypy + rev: v0.920 + hooks: + - id: mypy + files: gutenTAG + additional_dependencies: + - types-PyYAML + - types-jsonschema +- repo: https://github.com/psf/black + rev: '23.1.0' + hooks: + - id: black +- repo: https://github.com/pycqa/flake8 + rev: '5.0.4' + hooks: + - id: flake8 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..ee0bbd3 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,36 @@ +# Contributing + +## Code style and format + +We use [black](https://black.readthedocs.io/) to automatically format our python files. +Please stick to the black code style. + +Please consider using the pre-commit hooks. +They automatically run i.a. black for you. +See next section. + +### Black quick-installation guide + +```bash +pip install black +black +``` + +## Running pre-commit hooks + +We use [pre-commit](https://pre-commit.com/) to run some checks on your files before they are commited. +Find the configured hooks in [`.pre-commit-config.yaml`](./pre-commit-config.yaml). +If there are errors, you have to re-add the files to the index and commit the fixed files. + +### Pre-commit quick-installation guide + +```bash +pip install pre-commit +pre-commit install +``` + +Optionally, cou can then run the hooks against all files with: + +```bash +pre-commit run --all-files +``` diff --git a/README.md b/README.md index 16bfb4b..bb027c5 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,7 @@ [![CI](https://github.com/HPI-Information-Systems/gutentag/actions/workflows/build.yml/badge.svg)](https://github.com/HPI-Information-Systems/gutentag/actions/workflows/build.yml) [![codecov](https://codecov.io/gh/HPI-Information-Systems/gutentag/branch/main/graph/badge.svg?token=6QXOCY4TS2)](https://codecov.io/gh/HPI-Information-Systems/gutentag) +[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) [![PyPI package](https://badge.fury.io/py/timeeval-gutenTAG.svg)](https://badge.fury.io/py/timeeval-gutenTAG) [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) ![python version 3.7|3.8|3.9|3.10|3.11](https://img.shields.io/badge/python-3.7%20%7C%203.8%20%7C%203.9%20%7C%203.10%20%7C%203.11-blue) @@ -93,6 +94,8 @@ If you use GutenTAG in your project or research, please cite our demonstration p } ``` -## To-Do +## Contributing -- [ ] negation anomaly (does a pattern not appear) +We welcome contributions to GutenTAG. +If you have spotted an issue with GutenTAG or if you want to enhance it, please open an issue first. +See [Contributing](CONTRIBUTING.md) for details. diff --git a/doc/advanced-features.md b/doc/advanced-features.md index 61112cc..bf81894 100644 --- a/doc/advanced-features.md +++ b/doc/advanced-features.md @@ -2,7 +2,7 @@ ## Add-Ons -GutenTAG has a simple add-on feature which can be activated by using the [CLI](usage#from-cli). +GutenTAG has a simple add-on feature which can be activated by using the [CLI](usage#from-cli). ### Definition diff --git a/doc/introduction/base-oscillations.md b/doc/introduction/base-oscillations.md index 1bd07a5..8033fbf 100644 --- a/doc/introduction/base-oscillations.md +++ b/doc/introduction/base-oscillations.md @@ -239,7 +239,7 @@ operand: Union[float, FormulaObj] **Aggregation** The alternative to the `Operation` is the `Aggregation` field. This field defines an aggregation operation on the `base`. Its result is either a scalar or an array. It consists of two fields: `kind`, and `axis`. -The `kind` field defines the kind of aggregation performed on a numpy array. The optional `axis` field defines the axis of the array the aggregation is performed on. If no `axis` is defined, the aggregation will return a scalar. +The `kind` field defines the kind of aggregation performed on a numpy array. The optional `axis` field defines the axis of the array the aggregation is performed on. If no `axis` is defined, the aggregation will return a scalar. ```yaml # Aggregation diff --git a/generation_configs/multivariate-test-cases.py b/generation_configs/multivariate-test-cases.py index 0668ed4..d6e1864 100644 --- a/generation_configs/multivariate-test-cases.py +++ b/generation_configs/multivariate-test-cases.py @@ -22,7 +22,7 @@ def random_bo(kind: str): "frequency": np.random.randint(1, 50), "variance": noise, "offset": np.random.rand() * 20 - 10, - "amplitude": np.random.rand() * 10 + "amplitude": np.random.rand() * 10, }, "complex-sine": { "kind": "sine", @@ -31,16 +31,13 @@ def random_bo(kind: str): "offset": np.random.rand() * 20 - 10, "amplitude": np.random.rand() * 10, "freq-mod": 0.01, - "trend": { - "kind": "polynomial", - "polynomial": [1, 1] - } + "trend": {"kind": "polynomial", "polynomial": [1, 1]}, }, "ecg": { "kind": "ecg", "frequency": np.random.randint(10, 50), "variance": 0.001, - "offset": np.random.rand() * 20 - 10 + "offset": np.random.rand() * 20 - 10, }, "cbf": { "kind": "cylinder-bell-funnel", @@ -49,14 +46,14 @@ def random_bo(kind: str): "amplitude": np.random.rand() * 10 + 0.1, "avg-pattern-length": np.random.randint(10, 200), "variance-pattern-length": 0.2, - "variance-amplitude": 0.5 + "variance-amplitude": 0.5, }, "rw": { "kind": "random-walk", "variance": noise, "offset": np.random.rand() * 20 - 10, "amplitude": 1, - "smoothing": 0.01 + "smoothing": 0.01, }, "poly": { "kind": "polynomial", @@ -71,78 +68,48 @@ def random_bo(kind: str): "frequency": np.random.randint(10, 1000), "channel_diff": np.random.rand() * 10, "channel_offset": 1, - } + }, } return mapping[kind] def random_anomaly(kind: str): trend_bos = [ - { - "kind": "polynomial", - "polynomial": [2, 2] - }, - { - "kind": "polynomial", - "polynomial": [0.2, 0.2] - }, - { - "kind": "sine", - "frequency": 0.725, - "amplitude": 0.25, - "variance": 0.0 - }, - { - "kind": "sine", - "frequency": 1, - "amplitude": .5, - "variance": 0.0 - } + {"kind": "polynomial", "polynomial": [2, 2]}, + {"kind": "polynomial", "polynomial": [0.2, 0.2]}, + {"kind": "sine", "frequency": 0.725, "amplitude": 0.25, "variance": 0.0}, + {"kind": "sine", "frequency": 1, "amplitude": 0.5, "variance": 0.0}, ] mapping = { "amplitude": { "kind": "amplitude", - "amplitude_factor": np.random.choice([0.1, 0.25, 0.5, 2, 3, 10]) + "amplitude_factor": np.random.choice([0.1, 0.25, 0.5, 2, 3, 10]), }, "extremum": { "kind": "extremum", "min": False, "local": bool(np.random.choice([True, False])), - "context_window": 100 + "context_window": 100, }, "frequency": { "kind": "frequency", - "frequency_factor": np.random.choice([0.1, 0.25, 0.3, 0.5, 2, 3, 10]) - }, - "mean": { - "kind": "mean", - "offset": np.random.rand() * 20 - 10 + "frequency_factor": np.random.choice([0.1, 0.25, 0.3, 0.5, 2, 3, 10]), }, + "mean": {"kind": "mean", "offset": np.random.rand() * 20 - 10}, "pattern": { "kind": "pattern", "sinusoid_k": np.random.rand(), - "cbf_pattern_factor": np.random.rand() + "cbf_pattern_factor": np.random.rand(), }, "pattern-shift": { "kind": "pattern-shift", "shift_by": np.random.randint(-50, 50), - "transition_window": 50 - }, - "platform": { - "kind": "platform", - "value": np.random.randint(10) - }, - "trend": { - "kind": "trend", - "oscillation": np.random.choice(trend_bos) - }, - "variance": { - "kind": "variance", - "variance": np.random.rand() / 2 + "transition_window": 50, }, - "mode-correlation": { - "kind": "mode-correlation" - } + "platform": {"kind": "platform", "value": np.random.randint(10)}, + "trend": {"kind": "trend", "oscillation": np.random.choice(trend_bos)}, + "variance": {"kind": "variance", "variance": np.random.rand() / 2}, + "mode-correlation": {"kind": "mode-correlation"}, } return mapping[kind] @@ -154,7 +121,7 @@ def create_ts_def(name: str, bo_defs, anom_defs): "semi-supervised": GENERATE_SEMI_SUPERVISED, "supervised": GENERATE_SUPERVISED, "base-oscillations": bo_defs, - "anomalies": anom_defs + "anomalies": anom_defs, } @@ -163,7 +130,7 @@ def gen_channel_series(): anomalies = { "ecg": ["frequency", "variance", "extremum"], "rw": ["platform", "variance", "extremum"], - "poly": ["platform", "variance", "extremum"] + "poly": ["platform", "variance", "extremum"], } anomaly_lengths = [100, 200, 500, 1000] n_channels = [2, 5, 10, 50, 100, 500] @@ -173,19 +140,23 @@ def gen_channel_series(): for d in n_channels: bo_defs = [random_bo(bo) for _ in range(d)] for anom in anomalies[bo]: - for l in anomaly_lengths: + for length in anomaly_lengths: if anom == "extremum": - l = 1 - timeseries.append(create_ts_def( - f"channels-{d}-{bo}-{anom}-{l}", - bo_defs, - [{ - "position": "middle", - "length": l, - "channel": np.random.randint(d), - "kinds": [random_anomaly(anom)] - }] - )) + length = 1 + timeseries.append( + create_ts_def( + f"channels-{d}-{bo}-{anom}-{length}", + bo_defs, + [ + { + "position": "middle", + "length": length, + "channel": np.random.randint(d), + "kinds": [random_anomaly(anom)], + } + ], + ) + ) if anom == "extremum": break @@ -198,16 +169,20 @@ def gen_bo_diversity_series(): # all same n_bos = 10 for bo in bos: - timeseries.append(create_ts_def( - f"bo-diversity-all-same-{bo}", - [random_bo(bo) for _ in range(n_bos)], - [{ - "position": "middle", - "length": 100, - "channel": np.random.randint(n_bos), - "kinds": [random_anomaly("mean")] - }] - )) + timeseries.append( + create_ts_def( + f"bo-diversity-all-same-{bo}", + [random_bo(bo) for _ in range(n_bos)], + [ + { + "position": "middle", + "length": 100, + "channel": np.random.randint(n_bos), + "kinds": [random_anomaly("mean")], + } + ], + ) + ) # same diff base_bo = "sine" n_bos = 10 @@ -216,24 +191,23 @@ def gen_bo_diversity_series(): bo_defs = np.array([random_bo(base_bo) for _ in range(n_bos)]) idxs = np.random.choice(n_bos, size=i, replace=False) bo_defs[idxs] = [random_bo(bo) for _ in range(i)] - timeseries.append(create_ts_def( - f"bo-diversity-{i}-same-diff-{bo}", - bo_defs.tolist(), - [{ - "position": "middle", - "length": 100, - "channel": np.random.randint(n_bos), - "kinds": [random_anomaly("mean")] - }] - )) + timeseries.append( + create_ts_def( + f"bo-diversity-{i}-same-diff-{bo}", + bo_defs.tolist(), + [ + { + "position": "middle", + "length": 100, + "channel": np.random.randint(n_bos), + "kinds": [random_anomaly("mean")], + } + ], + ) + ) # multiple diff n_bos = 10 - times_mapping = { - 2: [5, 5], - 3: [3, 3, 4], - 4: [2, 2, 3, 3], - 5: [2, 2, 2, 2, 2] - } + times_mapping = {2: [5, 5], 3: [3, 3, 4], 4: [2, 2, 3, 3], 5: [2, 2, 2, 2, 2]} for i in [2, 3, 4, 5]: bo_combs = list(combinations(bos, i)) for bo_tuple in bo_combs: @@ -246,23 +220,36 @@ def gen_bo_diversity_series(): pos_begin = np.sum(times_mapping[i][:anom_pos], dtype=np.int_) pos_end = pos_begin + times_mapping[i][anom_pos] pos = np.random.randint(pos_begin, pos_end) - timeseries.append(create_ts_def( - f"bo-diversity-{i}-multi-diff-BOS={'_'.join(sorted(bo_tuple))}-ANOM={sorted(bo_tuple).index(bo_tuple[anom_pos])}", - bo_defs, - [{ - "position": "middle", - "length": 100, - "channel": pos, - "kinds": [random_anomaly("mean")] - }] - )) + timeseries.append( + create_ts_def( + f"bo-diversity-{i}-multi-diff-BOS={'_'.join(sorted(bo_tuple))}-ANOM={sorted(bo_tuple).index(bo_tuple[anom_pos])}", + bo_defs, + [ + { + "position": "middle", + "length": 100, + "channel": pos, + "kinds": [random_anomaly("mean")], + } + ], + ) + ) return timeseries def gen_anomaly_appearance_series(): timeseries = [] - anomalies = ["amplitude", "extremum", "frequency", "mean", "pattern", - "pattern-shift", "platform", "trend", "variance"] + anomalies = [ + "amplitude", + "extremum", + "frequency", + "mean", + "pattern", + "pattern-shift", + "platform", + "trend", + "variance", + ] n_bos = 20 bo_kind = "sine" # same @@ -270,16 +257,23 @@ def gen_anomaly_appearance_series(): for n in [1, 2, 5, 10, n_bos]: anom_channels = np.random.choice(n_bos, size=n, replace=False) position = np.random.randint(2500, 7500) - timeseries.append(create_ts_def( - f"anom-appearance-same-{n}-{anom}", - [random_bo(bo_kind) for _ in range(n_bos)], - [{ - "exact-position": position + 50 if anom == "extremum" else position, - "length": 1 if anom == "extremum" else 100, - "channel": int(c), - "kinds": [random_anomaly(anom)] - } for c in anom_channels] - )) + timeseries.append( + create_ts_def( + f"anom-appearance-same-{n}-{anom}", + [random_bo(bo_kind) for _ in range(n_bos)], + [ + { + "exact-position": position + 50 + if anom == "extremum" + else position, + "length": 1 if anom == "extremum" else 100, + "channel": int(c), + "kinds": [random_anomaly(anom)], + } + for c in anom_channels + ], + ) + ) # diff for n_kinds in [2, 3, 5]: anom_combs = list(combinations(anomalies, n_kinds)) @@ -301,222 +295,233 @@ def gen_anomaly_appearance_series(): anomaly_defs = [] position = np.random.randint(2500, 7500) for anom, anom_times in zip(anom_tuple, times): - anom_channels = np.random.choice(n_bos, size=anom_times, replace=False) + anom_channels = np.random.choice( + n_bos, size=anom_times, replace=False + ) for c in anom_channels: - anomaly_defs.append({ - "exact-position": position + 50 if anom == "extremum" else position, - "length": 1 if anom == "extremum" else 100, - "channel": int(c), - "kinds": [random_anomaly(anom)] - }) + anomaly_defs.append( + { + "exact-position": position + 50 + if anom == "extremum" + else position, + "length": 1 if anom == "extremum" else 100, + "channel": int(c), + "kinds": [random_anomaly(anom)], + } + ) - timeseries.append(create_ts_def( - f"anom-appearance-{n_kinds}-diff-{n}-ANOMS={'_'.join(sorted(anom_tuple))}", - [random_bo(bo_kind) for _ in range(n_bos)], - anomaly_defs - )) + timeseries.append( + create_ts_def( + f"anom-appearance-{n_kinds}-diff-{n}-ANOMS={'_'.join(sorted(anom_tuple))}", + [random_bo(bo_kind) for _ in range(n_bos)], + anomaly_defs, + ) + ) return timeseries def gen_special_series(): - return [{ - "name": "sum-cancels-out-anomaly", - "length": 10000, - "semi-supervised": GENERATE_SEMI_SUPERVISED, - "supervised": GENERATE_SUPERVISED, - "base-oscillations": [{ - "kind": "sine", - "frequency": 0.5, - "offset": 2 - }, { - "kind": "formula", - "formula": { - "base": 0, - "operation": { - "kind": "*", - "operand": -1. - } - } - }], - "anomalies": [{ - "exact-position": 5000, - "length": 100, - "channel": 0, - "kinds": [{"kind": "mean", "offset": 2}] - }, { - "exact-position": 5000, - "length": 100, - "channel": 1, - "kinds": [{"kind": "mean", "offset": -2}] - }] - }, { - "name": "3-is-sum-of-2", - "length": 10000, - "semi-supervised": GENERATE_SEMI_SUPERVISED, - "supervised": GENERATE_SUPERVISED, - "base-oscillations": [{ - "kind": "sine", - "frequency": 0.5, - "offset": 2 - }, { - "kind": "polynomial", - "polynomial": [0, -2], - }, { - "kind": "formula", - "formula": { - "base": 0, - "operation": { - "kind": "+", - "operand": { - "base": 1 - } + return [ + { + "name": "sum-cancels-out-anomaly", + "length": 10000, + "semi-supervised": GENERATE_SEMI_SUPERVISED, + "supervised": GENERATE_SUPERVISED, + "base-oscillations": [ + {"kind": "sine", "frequency": 0.5, "offset": 2}, + { + "kind": "formula", + "formula": {"base": 0, "operation": {"kind": "*", "operand": -1.0}}, + }, + ], + "anomalies": [ + { + "exact-position": 5000, + "length": 100, + "channel": 0, + "kinds": [{"kind": "mean", "offset": 2}], + }, + { + "exact-position": 5000, + "length": 100, + "channel": 1, + "kinds": [{"kind": "mean", "offset": -2}], + }, + ], + }, + { + "name": "3-is-sum-of-2", + "length": 10000, + "semi-supervised": GENERATE_SEMI_SUPERVISED, + "supervised": GENERATE_SUPERVISED, + "base-oscillations": [ + {"kind": "sine", "frequency": 0.5, "offset": 2}, + { + "kind": "polynomial", + "polynomial": [0, -2], + }, + { + "kind": "formula", + "formula": { + "base": 0, + "operation": {"kind": "+", "operand": {"base": 1}}, + }, + }, + ], + "anomalies": [ + { + "exact-position": 5000, + "length": 100, + "channel": 0, + "kinds": [{"kind": "mean", "offset": 2}], + }, + { + "exact-position": 5000, + "length": 100, + "channel": 1, + "kinds": [{"kind": "mean", "offset": -2}], + }, + { + "position": "end", + "length": 1, + "channel": 2, + "kinds": [ + { + "kind": "extremum", + "local": True, + "min": False, + "context_window": 200, + } + ], + }, + ], + }, + { + "name": "creeping-anomalies", + "length": 10000, + "semi-supervised": GENERATE_SEMI_SUPERVISED, + "supervised": GENERATE_SUPERVISED, + "base-oscillations": [ + {"kind": "sine", "frequency": 0.5, "offset": 2}, + { + "kind": "polynomial", + "polynomial": [0, -2], + }, + {"kind": "ecg", "frequency": 2, "variance": 0.005}, + ], + "anomalies": [ + { + "position": "middle", + "length": 300, + "creeping-length": 200, + "channel": 1, + "kinds": [{"kind": "mean", "offset": 2}], + }, + { + "position": "middle", + "length": 300, + "creeping-length": 250, + "channel": 0, + "kinds": [{"kind": "variance", "variance": 0.5}], + }, + { + "position": "end", + "length": 200, + "creeping-length": 100, + "channel": 2, + "kinds": [{"kind": "amplitude", "amplitude_factor": 2}], + }, + ], + }, + { + "name": "shifted-anomalies", + "length": 10000, + "semi-supervised": GENERATE_SEMI_SUPERVISED, + "supervised": GENERATE_SUPERVISED, + "base-oscillations": [ + {"kind": "sine", "frequency": 0.5, "offset": 2}, + { + "kind": "polynomial", + "polynomial": [0, -2], + }, + {"kind": "ecg", "frequency": 2, "variance": 0.005}, + ], + "anomalies": [ + { + "exact-position": 5200, + "length": 200, + "channel": 0, + "kinds": [{"kind": "frequency", "frequency_factor": 2}], + }, + { + "exact-position": 5300, + "length": 200, + "channel": 2, + "kinds": [{"kind": "frequency", "frequency_factor": 0.5}], + }, + ], + }, + { + "name": "missing-pattern-1", + "length": 10000, + "semi-supervised": GENERATE_SEMI_SUPERVISED, + "supervised": GENERATE_SUPERVISED, + "base-oscillations": [ + {"kind": "ecg", "frequency": 1, "offset": 0, "variance": 0.001} + ], + "anomalies": [ + { + "exact-position": 1715, + "length": 8, + "channel": 0, + "kinds": [ + {"kind": "platform", "value": 0.5}, + {"kind": "variance", "variance": 0.05}, + ], } - } - }], - "anomalies": [{ - "exact-position": 5000, - "length": 100, - "channel": 0, - "kinds": [{"kind": "mean", "offset": 2}] - }, { - "exact-position": 5000, - "length": 100, - "channel": 1, - "kinds": [{"kind": "mean", "offset": -2}] - }, { - "position": "end", - "length": 1, - "channel": 2, - "kinds": [{"kind": "extremum", "local": True, "min": False, "context_window": 200}] - }] - }, { - "name": "creeping-anomalies", - "length": 10000, - "semi-supervised": GENERATE_SEMI_SUPERVISED, - "supervised": GENERATE_SUPERVISED, - "base-oscillations": [{ - "kind": "sine", - "frequency": 0.5, - "offset": 2 - }, { - "kind": "polynomial", - "polynomial": [0, -2], - }, { - "kind": "ecg", - "frequency": 2, - "variance": 0.005 - }], - "anomalies": [{ - "position": "middle", - "length": 300, - "creeping-length": 200, - "channel": 1, - "kinds": [{"kind": "mean", "offset": 2}] - }, { - "position": "middle", - "length": 300, - "creeping-length": 250, - "channel": 0, - "kinds": [{"kind": "variance", "variance": .5}] - }, { - "position": "end", - "length": 200, - "creeping-length": 100, - "channel": 2, - "kinds": [{"kind": "amplitude", "amplitude_factor": 2}] - }] - }, { - "name": "shifted-anomalies", - "length": 10000, - "semi-supervised": GENERATE_SEMI_SUPERVISED, - "supervised": GENERATE_SUPERVISED, - "base-oscillations": [{ - "kind": "sine", - "frequency": 0.5, - "offset": 2 - }, { - "kind": "polynomial", - "polynomial": [0, -2], - }, { - "kind": "ecg", - "frequency": 2, - "variance": 0.005 - }], - "anomalies": [{ - "exact-position": 5200, - "length": 200, - "channel": 0, - "kinds": [{"kind": "frequency", "frequency_factor": 2}] - }, { - "exact-position": 5300, - "length": 200, - "channel": 2, - "kinds": [{"kind": "frequency", "frequency_factor": .5}] - }] - }, { - "name": "missing-pattern-1", - "length": 10000, - "semi-supervised": GENERATE_SEMI_SUPERVISED, - "supervised": GENERATE_SUPERVISED, - "base-oscillations": [{ - "kind": "ecg", - "frequency": 1, - "offset": 0, - "variance": 0.001 - }], - "anomalies": [{ - "exact-position": 1715, - "length": 8, - "channel": 0, - "kinds": [ - {"kind": "platform", "value": 0.5}, - {"kind": "variance", "variance": 0.05}, - ] - }] - }, { - "name": "missing-pattern-2", - "length": 10000, - "semi-supervised": GENERATE_SEMI_SUPERVISED, - "supervised": GENERATE_SUPERVISED, - "base-oscillations": [{ - "kind": "sine", - "frequency": .25, - "offset": 0, - "variance": 0 - }, { - "kind": "ecg", - "frequency": 1, - "offset": 0, - "variance": 0 - }, { - "kind": "formula", - "formula": { - "base": 0, - "operation": { - "kind": "*", - "operand": { - "base": 1 - } + ], + }, + { + "name": "missing-pattern-2", + "length": 10000, + "semi-supervised": GENERATE_SEMI_SUPERVISED, + "supervised": GENERATE_SUPERVISED, + "base-oscillations": [ + {"kind": "sine", "frequency": 0.25, "offset": 0, "variance": 0}, + {"kind": "ecg", "frequency": 1, "offset": 0, "variance": 0}, + { + "kind": "formula", + "formula": { + "base": 0, + "operation": {"kind": "*", "operand": {"base": 1}}, + }, + }, + ], + "anomalies": [ + { + "exact-position": 4316, + "length": 7, + "channel": 2, + "kinds": [{"kind": "platform", "value": 0.3}], } - } - }], - "anomalies": [{ - "exact-position": 4316, - "length": 7, - "channel": 2, - "kinds": [{"kind": "platform", "value": 0.3}] - }] - }] + ], + }, + ] -if __name__ == '__main__': +if __name__ == "__main__": path = Path("multivariate-test-cases") print("Generating config ...") config = { - "timeseries": gen_channel_series() + gen_bo_diversity_series() + gen_anomaly_appearance_series() + gen_special_series() + "timeseries": gen_channel_series() + + gen_bo_diversity_series() + + gen_anomaly_appearance_series() + + gen_special_series() } - gutentag = GutenTAG(n_jobs=-1, seed=SEED, addons=["gutenTAG.addons.timeeval.TimeEvalAddOn"]) + gutentag = GutenTAG( + n_jobs=-1, seed=SEED, addons=["gutenTAG.addons.timeeval.TimeEvalAddOn"] + ) gutentag.load_config_dict(config) # gutentag.load_config_dict({"timeseries": gen_special_series()}, only="sum-cancels-out-anomaly") gutentag.generate(output_folder=path) diff --git a/gutenTAG/__main__.py b/gutenTAG/__main__.py index c707c49..7be924f 100644 --- a/gutenTAG/__main__.py +++ b/gutenTAG/__main__.py @@ -8,26 +8,54 @@ def parse_args(args: List[str]) -> argparse.Namespace: - parser = argparse.ArgumentParser(description="GutenTAG! A good Time series Anomaly Generator.") - - parser.add_argument("--version", action="store_true", help="Display GutenTAG version and exit.") - parser.add_argument("--config-yaml", type=Path, required=True, help="Path to config YAML") - parser.add_argument("--output-dir", type=Path, default=Path("./generated-timeseries"), - help="Path to output directory") - parser.add_argument("--plot", action="store_true", help="Plot every generated time series.") - parser.add_argument("--no-save", action="store_true", - help="Prevent GutenTAG from saving the generated time series.") + parser = argparse.ArgumentParser( + description="GutenTAG! A good Time series Anomaly Generator." + ) + + parser.add_argument( + "--version", action="store_true", help="Display GutenTAG version and exit." + ) + parser.add_argument( + "--config-yaml", type=Path, required=True, help="Path to config YAML" + ) + parser.add_argument( + "--output-dir", + type=Path, + default=Path("./generated-timeseries"), + help="Path to output directory", + ) + parser.add_argument( + "--plot", action="store_true", help="Plot every generated time series." + ) + parser.add_argument( + "--no-save", + action="store_true", + help="Prevent GutenTAG from saving the generated time series.", + ) parser.add_argument("--seed", type=int, default=None, help="Random seed") - parser.add_argument("--addons", nargs="*", default=[], help="Use Add-Ons for generating time series.") - parser.add_argument("--n_jobs", "--n-jobs", type=int, default=1, - help="Number of time series to generate in parallel.") - parser.add_argument("--only", type=str, help="Process only timeseries with the defined name.") + parser.add_argument( + "--addons", + nargs="*", + default=[], + help="Use Add-Ons for generating time series.", + ) + parser.add_argument( + "--n_jobs", + "--n-jobs", + type=int, + default=1, + help="Number of time series to generate in parallel.", + ) + parser.add_argument( + "--only", type=str, help="Process only timeseries with the defined name." + ) return parser.parse_args(args) def main(sys_args: List[str]) -> None: - print(f""" + print( + f""" Welcome to @@ -43,7 +71,8 @@ def main(sys_args: List[str]) -> None: "Good day!" wishes your friendly Timeseries Anomaly Generator. -""") +""" + ) if "--version" in sys_args: return @@ -59,14 +88,10 @@ def main(sys_args: List[str]) -> None: n_jobs=args.n_jobs, seed=args.seed, addons=args.addons, - only=args.only + only=args.only, ) - gutentag.generate( - return_timeseries=False, - output_folder=output, - plot=args.plot - ) + gutentag.generate(return_timeseries=False, output_folder=output, plot=args.plot) def cli() -> None: diff --git a/gutenTAG/addons/__init__.py b/gutenTAG/addons/__init__.py index 4f19926..4788d03 100644 --- a/gutenTAG/addons/__init__.py +++ b/gutenTAG/addons/__init__.py @@ -21,7 +21,9 @@ class AddOnProcessContext: def should_save(self) -> bool: return self.output_folder is not None - def copy(self, timeseries: Optional[TimeSeries] = None, config: Optional[Dict] = None) -> AddOnProcessContext: + def copy( + self, timeseries: Optional[TimeSeries] = None, config: Optional[Dict] = None + ) -> AddOnProcessContext: return AddOnProcessContext( timeseries=timeseries or self.timeseries, config=config or self.config, @@ -70,13 +72,18 @@ def finalize(self, ctx: AddOnFinalizeContext) -> None: def import_addons(addons: List[str]) -> List[Type[BaseAddOn]]: builtin_module = "gutenTAG.addons.builtin" - module_classes = [(addon.rsplit(".", 1) if "." in addon else (builtin_module, addon)) for addon in addons] + module_classes = [ + (addon.rsplit(".", 1) if "." in addon else (builtin_module, addon)) + for addon in addons + ] def load_addon(package, cls): try: module = importlib.import_module(package) except ImportError as ex: - raise ValueError(f"Package '{package}' for AddOn {cls} could not be loaded!") from ex + raise ValueError( + f"Package '{package}' for AddOn {cls} could not be loaded!" + ) from ex try: addon_cls = module.__dict__[cls] @@ -84,8 +91,10 @@ def load_addon(package, cls): raise ValueError(f"AddOn {cls} not found in package '{package}'!") if not issubclass(addon_cls, BaseAddOn): - raise ValueError(f"Trying to load addon {package}.{cls}, but it is not a compatible AddOn! GutenTAG " - "AddOns must inherit from gutenTAG.addons.BaseAddOn!") + raise ValueError( + f"Trying to load addon {package}.{cls}, but it is not a compatible AddOn! GutenTAG " + "AddOns must inherit from gutenTAG.addons.BaseAddOn!" + ) return addon_cls diff --git a/gutenTAG/addons/builtin.py b/gutenTAG/addons/builtin.py index 73a8d2c..1b1baa7 100644 --- a/gutenTAG/addons/builtin.py +++ b/gutenTAG/addons/builtin.py @@ -1 +1 @@ -from .timeeval import TimeEvalAddOn +from .timeeval import TimeEvalAddOn # noqa: F401 diff --git a/gutenTAG/addons/timeeval.py b/gutenTAG/addons/timeeval.py index f437467..0039331 100644 --- a/gutenTAG/addons/timeeval.py +++ b/gutenTAG/addons/timeeval.py @@ -9,8 +9,16 @@ from gutenTAG.base_oscillations.utils.math_func_support import calc_period_length from gutenTAG.generator import TimeSeries from gutenTAG.utils.default_values import default_values -from gutenTAG.utils.global_variables import SUPERVISED_FILENAME, UNSUPERVISED_FILENAME, SEMI_SUPERVISED_FILENAME, \ - BASE_OSCILLATIONS, ANOMALIES, PARAMETERS, BASE_OSCILLATION, BASE_OSCILLATION_NAMES +from gutenTAG.utils.global_variables import ( + SUPERVISED_FILENAME, + UNSUPERVISED_FILENAME, + SEMI_SUPERVISED_FILENAME, + BASE_OSCILLATIONS, + ANOMALIES, + PARAMETERS, + BASE_OSCILLATION, + BASE_OSCILLATION_NAMES, +) columns = [ @@ -35,7 +43,7 @@ "stddev", "trend", "stationarity", - "period_size" + "period_size", ] @@ -60,17 +68,16 @@ def __init__(self): def process(self, ctx: AddOnProcessContext) -> AddOnProcessContext: ts = ctx.timeseries config = ctx.config - datasets = [ - self._process_timeseries(config, ts, LearningType.Unsupervised) - ] + datasets = [self._process_timeseries(config, ts, LearningType.Unsupervised)] if ts.supervised: - datasets.append(self._process_timeseries(config, ts, LearningType.Supervised)) + datasets.append( + self._process_timeseries(config, ts, LearningType.Supervised) + ) if ts.semi_supervised: - datasets.append(self._process_timeseries(config, ts, LearningType.SemiSupervised)) - return ctx.store_data(self.key, { - "name": ts.dataset_name, - "datasets": datasets - }) + datasets.append( + self._process_timeseries(config, ts, LearningType.SemiSupervised) + ) + return ctx.store_data(self.key, {"name": ts.dataset_name, "datasets": datasets}) def finalize(self, ctx: AddOnFinalizeContext) -> None: # add metadata @@ -84,7 +91,9 @@ def finalize(self, ctx: AddOnFinalizeContext) -> None: filename = os.path.join(ctx.output_folder, "datasets.csv") df.to_csv(filename, index=False) - def _process_timeseries(self, config: Dict, generator: TimeSeries, tpe: LearningType) -> Dict[str, Any]: + def _process_timeseries( + self, config: Dict, generator: TimeSeries, tpe: LearningType + ) -> Dict[str, Any]: dataset: Dict[str, Any] = dict() dataset_name = generator.dataset_name @@ -100,17 +109,32 @@ def _process_timeseries(self, config: Dict, generator: TimeSeries, tpe: Learning dataset["input_type"] = "univariate" if ts.shape[1] == 1 else "multivariate" dataset["length"] = config.get(PARAMETERS.LENGTH, 10000) dataset["dimensions"] = ts.shape[1] - dataset["contamination"] = self._calc_contamination(config.get(ANOMALIES, []), dataset[PARAMETERS.LENGTH]) + dataset["contamination"] = self._calc_contamination( + config.get(ANOMALIES, []), dataset[PARAMETERS.LENGTH] + ) dataset["num_anomalies"] = len(config.get(ANOMALIES, [])) - dataset["min_anomaly_length"] = min([anomaly.get("length") for anomaly in config.get(ANOMALIES, [])]) - dataset["median_anomaly_length"] = np.median([anomaly.get(PARAMETERS.LENGTH) for anomaly in config.get(ANOMALIES, [])]) - dataset["max_anomaly_length"] = max([anomaly.get(PARAMETERS.LENGTH) for anomaly in config.get(ANOMALIES, [])]) + dataset["min_anomaly_length"] = min( + [anomaly.get("length") for anomaly in config.get(ANOMALIES, [])] + ) + dataset["median_anomaly_length"] = np.median( + [anomaly.get(PARAMETERS.LENGTH) for anomaly in config.get(ANOMALIES, [])] + ) + dataset["max_anomaly_length"] = max( + [anomaly.get(PARAMETERS.LENGTH) for anomaly in config.get(ANOMALIES, [])] + ) dataset["train_type"] = tpe.value dataset["train_is_normal"] = False if tpe == LearningType.Supervised else True dataset["mean"] = ts.mean() dataset["stddev"] = ts.std(axis=1).mean() - dataset["trend"] = config.get(BASE_OSCILLATION, {}).get(PARAMETERS.TREND, {}).get(PARAMETERS.KIND, np.NAN) - dataset["period_size"] = TimeEvalAddOn._calc_period_size(config.get(BASE_OSCILLATION, config.get(BASE_OSCILLATIONS, [{}])), dataset[PARAMETERS.LENGTH]) + dataset["trend"] = ( + config.get(BASE_OSCILLATION, {}) + .get(PARAMETERS.TREND, {}) + .get(PARAMETERS.KIND, np.NAN) + ) + dataset["period_size"] = TimeEvalAddOn._calc_period_size( + config.get(BASE_OSCILLATION, config.get(BASE_OSCILLATIONS, [{}])), + dataset[PARAMETERS.LENGTH], + ) return dataset @staticmethod @@ -123,13 +147,18 @@ def _set_global_vals(df: pd.DataFrame) -> None: @staticmethod def _calc_contamination(anomalies: Iterable[Dict], ts_length: int) -> float: - anomaly_lengths = [anomaly.get(PARAMETERS.LENGTH, default_values[ANOMALIES][PARAMETERS.LENGTH]) for anomaly in anomalies] + anomaly_lengths = [ + anomaly.get(PARAMETERS.LENGTH, default_values[ANOMALIES][PARAMETERS.LENGTH]) + for anomaly in anomalies + ] if len(anomaly_lengths) > 0: return sum(anomaly_lengths) / ts_length return 0 @staticmethod - def _calc_period_size(base: Union[Dict[str, Any], List[Dict[str, Any]]], length: int) -> float: + def _calc_period_size( + base: Union[Dict[str, Any], List[Dict[str, Any]]], length: int + ) -> float: bases: List[Dict[str, Any]] = [] if type(base) == dict: bases.append(base) # type: ignore # does not understand the condition before @@ -141,20 +170,39 @@ def _calc_period_size(base: Union[Dict[str, Any], List[Dict[str, Any]]], length: for dim in bases: frequency = dim.get(PARAMETERS.FREQUENCY) kind = dim.get(PARAMETERS.KIND) - if frequency is None or kind not in [BASE_OSCILLATION_NAMES.SINE, BASE_OSCILLATION_NAMES.COSINE, - BASE_OSCILLATION_NAMES.ECG, BASE_OSCILLATION_NAMES.RANDOM_MODE_JUMP, - BASE_OSCILLATION_NAMES.SQUARE, BASE_OSCILLATION_NAMES.SAWTOOTH, - BASE_OSCILLATION_NAMES.DIRICHLET, BASE_OSCILLATION_NAMES.MLS]: + if frequency is None or kind not in [ + BASE_OSCILLATION_NAMES.SINE, + BASE_OSCILLATION_NAMES.COSINE, + BASE_OSCILLATION_NAMES.ECG, + BASE_OSCILLATION_NAMES.RANDOM_MODE_JUMP, + BASE_OSCILLATION_NAMES.SQUARE, + BASE_OSCILLATION_NAMES.SAWTOOTH, + BASE_OSCILLATION_NAMES.DIRICHLET, + BASE_OSCILLATION_NAMES.MLS, + ]: periods.append(np.NAN) - elif kind in [BASE_OSCILLATION_NAMES.SINE, BASE_OSCILLATION_NAMES.COSINE, BASE_OSCILLATION_NAMES.ECG, - BASE_OSCILLATION_NAMES.SQUARE, BASE_OSCILLATION_NAMES.SAWTOOTH]: + elif kind in [ + BASE_OSCILLATION_NAMES.SINE, + BASE_OSCILLATION_NAMES.COSINE, + BASE_OSCILLATION_NAMES.ECG, + BASE_OSCILLATION_NAMES.SQUARE, + BASE_OSCILLATION_NAMES.SAWTOOTH, + ]: periods.append(calc_period_length(frequency)) elif kind == BASE_OSCILLATION_NAMES.DIRICHLET: - periodicity = dim.get(PARAMETERS.PERIODICITY, default_values[BASE_OSCILLATIONS][PARAMETERS.PERIODICITY]) - periods.append(calc_period_length(frequency) * int((periodicity % 2 == 0) + 1)) + periodicity = dim.get( + PARAMETERS.PERIODICITY, + default_values[BASE_OSCILLATIONS][PARAMETERS.PERIODICITY], + ) + periods.append( + calc_period_length(frequency) * int((periodicity % 2 == 0) + 1) + ) elif kind == BASE_OSCILLATION_NAMES.MLS: - complexity = dim.get(PARAMETERS.COMPLEXITY, default_values[BASE_OSCILLATIONS][PARAMETERS.COMPLEXITY]) - periods.append(2 ** complexity - 1) + complexity = dim.get( + PARAMETERS.COMPLEXITY, + default_values[BASE_OSCILLATIONS][PARAMETERS.COMPLEXITY], + ) + periods.append(2**complexity - 1) elif kind == BASE_OSCILLATION_NAMES.RANDOM_MODE_JUMP: periods.append(int(length / frequency)) return float(np.nanmedian(periods)) diff --git a/gutenTAG/anomalies/__init__.py b/gutenTAG/anomalies/__init__.py index c5d0dc3..672a0be 100644 --- a/gutenTAG/anomalies/__init__.py +++ b/gutenTAG/anomalies/__init__.py @@ -31,12 +31,14 @@ class Anomaly: """This class acts like a generator graph, that collects all options in the beginning and, while being injected to a BaseOscillation, performs the changes.""" - def __init__(self, - position: Position, - exact_position: Optional[int], - anomaly_length: int, - channel: int = 0, - creeping_length: int = 0): + def __init__( + self, + position: Position, + exact_position: Optional[int], + anomaly_length: int, + channel: int = 0, + creeping_length: int = 0, + ): self.position = position self.exact_position = exact_position self.anomaly_length = anomaly_length @@ -48,7 +50,10 @@ def __init__(self, def set_anomaly(self, anomaly_kind: BaseAnomaly) -> Anomaly: self.anomaly_kinds.append(anomaly_kind) - self._requires_period_start_position = self._requires_period_start_position or anomaly_kind.requires_period_start_position + self._requires_period_start_position = ( + self._requires_period_start_position + or anomaly_kind.requires_period_start_position + ) return self def generate(self, ctx: AnomalyGenerationContext) -> AnomalyProtocol: @@ -59,32 +64,45 @@ def generate(self, ctx: AnomalyGenerationContext) -> AnomalyProtocol: length = end - start label_range = LabelRange(start, length) - protocol = AnomalyProtocol(start, end, self.channel, ctx, label_range, creeping_length=self.creeping_length) + protocol = AnomalyProtocol( + start, + end, + self.channel, + ctx, + label_range, + creeping_length=self.creeping_length, + ) for anomaly in self.anomaly_kinds: protocol = anomaly.generate(protocol) return protocol - def _find_position(self, ctx: AnomalyGenerationContext, max_tries: int = 50) -> Tuple[int, int]: + def _find_position( + self, ctx: AnomalyGenerationContext, max_tries: int = 50 + ) -> Tuple[int, int]: n_tries = max_tries while n_tries > 0: pos = self._get_random_position(ctx) n_tries -= 1 - if pos[1] < ctx.base_oscillation.length and not self._has_collision(pos, ctx.previous_anomaly_positions): + if pos[1] < ctx.base_oscillation.length and not self._has_collision( + pos, ctx.previous_anomaly_positions + ): return pos - raise ValueError(f"Giving up on finding a position for {self.anomaly_length}-point anomaly at {self.position} " - f"in channel {self.channel}! Maximum number of retries ({max_tries}) exceeded!") + raise ValueError( + f"Giving up on finding a position for {self.anomaly_length}-point anomaly at {self.position} " + f"in channel {self.channel}! Maximum number of retries ({max_tries}) exceeded!" + ) def _get_random_position(self, ctx: AnomalyGenerationContext) -> Tuple[int, int]: timeseries_periods = ctx.timeseries_periods period_size = ctx.timeseries_period_size if ( - not self._requires_period_start_position - or timeseries_periods is None - or timeseries_periods <= 6 - or period_size is None - or period_size <= 2 + not self._requires_period_start_position + or timeseries_periods is None + or timeseries_periods <= 6 + or period_size is None + or period_size <= 2 ): return self._get_random_position_no_periodicity(ctx) @@ -94,11 +112,13 @@ def _get_random_position(self, ctx: AnomalyGenerationContext) -> Tuple[int, int] start_period = ctx.rng.choice(list(range(periods_per_section))) position = self.position.id - start = period_size * (position*periods_per_section + start_period) + start = period_size * (position * periods_per_section + start_period) end = start + self.anomaly_length return start, end - def _get_random_position_no_periodicity(self, ctx: AnomalyGenerationContext) -> Tuple[int, int]: + def _get_random_position_no_periodicity( + self, ctx: AnomalyGenerationContext + ) -> Tuple[int, int]: timeseries_length = ctx.base_oscillation.length section_size = timeseries_length // 3 position_in_section = ctx.rng.choice(np.arange(section_size)) @@ -109,10 +129,16 @@ def _get_random_position_no_periodicity(self, ctx: AnomalyGenerationContext) -> return start, end @staticmethod - def _has_collision(current_pos: Tuple[int, int], other_pos: List[Tuple[int, int]]) -> bool: + def _has_collision( + current_pos: Tuple[int, int], other_pos: List[Tuple[int, int]] + ) -> bool: if len(other_pos) == 0: return False others = np.array(other_pos) - start_collision = (others[:, 0] <= current_pos[0]) & (current_pos[0] <= others[:, 1]) - end_colllision = (others[:, 0] <= current_pos[1]) & (current_pos[1] <= others[:, 1]) + start_collision = (others[:, 0] <= current_pos[0]) & ( + current_pos[0] <= others[:, 1] + ) + end_colllision = (others[:, 0] <= current_pos[1]) & ( + current_pos[1] <= others[:, 1] + ) return bool(np.any(start_collision | end_colllision)) diff --git a/gutenTAG/anomalies/types/__init__.py b/gutenTAG/anomalies/types/__init__.py index 3942e68..4c4d8a4 100644 --- a/gutenTAG/anomalies/types/__init__.py +++ b/gutenTAG/anomalies/types/__init__.py @@ -29,7 +29,7 @@ def rng(self) -> np.random.Generator: return self.ctx.rng @property - def base_oscillation(self) -> 'BaseOscillationInterface': # type: ignore # otherwise we have a circular import + def base_oscillation(self) -> "BaseOscillationInterface": # type: ignore # noqa: F821 # otherwise we have a circular import return self.ctx.base_oscillation @property @@ -58,16 +58,28 @@ def generate(self, anomaly_protocol: AnomalyProtocol) -> AnomalyProtocol: def requires_period_start_position(self) -> bool: return False - def generate_creeping(self, anomaly_protocol: AnomalyProtocol, custom_anomaly_length: Optional[int] = None) -> np.ndarray: + def generate_creeping( + self, + anomaly_protocol: AnomalyProtocol, + custom_anomaly_length: Optional[int] = None, + ) -> np.ndarray: creeping_length = anomaly_protocol.creeping_length - anomaly_length = anomaly_protocol.length_without_creeping if custom_anomaly_length is None else custom_anomaly_length - return np.concatenate([ - np.linspace(0, 1, creeping_length), # creep - np.ones(anomaly_length) # anomaly - ]) + anomaly_length = ( + anomaly_protocol.length_without_creeping + if custom_anomaly_length is None + else custom_anomaly_length + ) + return np.concatenate( + [ + np.linspace(0, 1, creeping_length), # creep + np.ones(anomaly_length), # anomaly + ] + ) def turn_off_trend(self, anomaly_protocol): - anomaly_protocol.base_oscillation.trend_series[anomaly_protocol.start:anomaly_protocol.end] = 0 + anomaly_protocol.base_oscillation.trend_series[ + anomaly_protocol.start : anomaly_protocol.end + ] = 0 @staticmethod @abstractmethod diff --git a/gutenTAG/anomalies/types/amplitude.py b/gutenTAG/anomalies/types/amplitude.py index aad4160..d2d121f 100644 --- a/gutenTAG/anomalies/types/amplitude.py +++ b/gutenTAG/anomalies/types/amplitude.py @@ -20,29 +20,62 @@ def __init__(self, parameters: AnomalyAmplitudeParameters): self.amplitude_factor = parameters.amplitude_factor def generate(self, anomaly_protocol: AnomalyProtocol) -> AnomalyProtocol: - if anomaly_protocol.base_oscillation_kind in [Polynomial.KIND, Formula.KIND, RandomModeJump.KIND]: - self.logger.warn_false_combination(self.__class__.__name__, anomaly_protocol.base_oscillation_kind) + if anomaly_protocol.base_oscillation_kind in [ + Polynomial.KIND, + Formula.KIND, + RandomModeJump.KIND, + ]: + self.logger.warn_false_combination( + self.__class__.__name__, anomaly_protocol.base_oscillation_kind + ) return anomaly_protocol length = anomaly_protocol.end - anomaly_protocol.start if anomaly_protocol.creeping_length == 0: transition_length = int(length * 0.2) plateau = np.ones(int(length * 0.6)) - start_transition = norm.pdf(np.linspace(-3, 0, transition_length), scale=1.05) + start_transition = norm.pdf( + np.linspace(-3, 0, transition_length), scale=1.05 + ) end_transition = norm.pdf(np.linspace(0, 3, transition_length), scale=1.05) - amplitude_bell = np.concatenate([start_transition / start_transition.max(), plateau, end_transition / end_transition.max()]) + amplitude_bell = np.concatenate( + [ + start_transition / start_transition.max(), + plateau, + end_transition / end_transition.max(), + ] + ) else: anomaly_length = length - anomaly_protocol.creeping_length - creeping = self.generate_creeping(anomaly_protocol, custom_anomaly_length=int(anomaly_length * 0.8)) - end_transition = norm.pdf(np.linspace(0, 3, int(anomaly_length * 0.2)), scale=1.05) - amplitude_bell = np.concatenate([creeping, end_transition / end_transition.max()]) + creeping = self.generate_creeping( + anomaly_protocol, custom_anomaly_length=int(anomaly_length * 0.8) + ) + end_transition = norm.pdf( + np.linspace(0, 3, int(anomaly_length * 0.2)), scale=1.05 + ) + amplitude_bell = np.concatenate( + [creeping, end_transition / end_transition.max()] + ) if self.amplitude_factor < 1.0: - amplitude_bell = MinMaxScaler(feature_range=(1.0, 2.0 - self.amplitude_factor)).fit_transform(amplitude_bell.reshape(-1, 1)).reshape(-1) + amplitude_bell = ( + MinMaxScaler(feature_range=(1.0, 2.0 - self.amplitude_factor)) + .fit_transform(amplitude_bell.reshape(-1, 1)) + .reshape(-1) + ) amplitude_bell = amplitude_bell * -1 + 2 else: - amplitude_bell = MinMaxScaler(feature_range=(1.0, self.amplitude_factor)).fit_transform(amplitude_bell.reshape(-1, 1)).reshape(-1) + amplitude_bell = ( + MinMaxScaler(feature_range=(1.0, self.amplitude_factor)) + .fit_transform(amplitude_bell.reshape(-1, 1)) + .reshape(-1) + ) - subsequence = anomaly_protocol.base_oscillation.timeseries[anomaly_protocol.start:anomaly_protocol.end] * amplitude_bell + subsequence = ( + anomaly_protocol.base_oscillation.timeseries[ + anomaly_protocol.start : anomaly_protocol.end + ] + * amplitude_bell + ) anomaly_protocol.subsequences.append(subsequence) return anomaly_protocol diff --git a/gutenTAG/anomalies/types/extremum.py b/gutenTAG/anomalies/types/extremum.py index b9b16bf..fead6c1 100644 --- a/gutenTAG/anomalies/types/extremum.py +++ b/gutenTAG/anomalies/types/extremum.py @@ -24,19 +24,25 @@ def __init__(self, parameters: AnomalyExtremumParameters): def generate(self, anomaly_protocol: AnomalyProtocol) -> AnomalyProtocol: if anomaly_protocol.base_oscillation_kind == RandomModeJump.KIND: - self.logger.warn_false_combination(self.__class__.__name__, anomaly_protocol.base_oscillation_kind) + self.logger.warn_false_combination( + self.__class__.__name__, anomaly_protocol.base_oscillation_kind + ) return anomaly_protocol length = anomaly_protocol.end - anomaly_protocol.start if length != 1: - self.logger.logger.warn(f"Extremum anomaly can only have a length of 1 (was set to {length})! Ignoring.") + self.logger.logger.warn( + f"Extremum anomaly can only have a length of 1 (was set to {length})! Ignoring." + ) anomaly_protocol.end = anomaly_protocol.start + 1 anomaly_protocol.labels.length = 1 base: np.ndarray = anomaly_protocol.base_oscillation.timeseries if self.local: - context_start = max(anomaly_protocol.start - self.context_window//2, 0) - context_end = min(anomaly_protocol.end + self.context_window//2, base.shape[0]) + context_start = max(anomaly_protocol.start - self.context_window // 2, 0) + context_end = min( + anomaly_protocol.end + self.context_window // 2, base.shape[0] + ) context = base[context_start:context_end] diff = context.max() - context.min() extremum = anomaly_protocol.rng.random() * diff @@ -46,8 +52,8 @@ def generate(self, anomaly_protocol: AnomalyProtocol) -> AnomalyProtocol: # let extremum be significant enough to be distinguishable from noise max_noise: float = np.max(np.abs(anomaly_protocol.base_oscillation.noise)) - if extremum < 2*max_noise: - extremum += 2*max_noise + if extremum < 2 * max_noise: + extremum += 2 * max_noise if self.min: value = base[anomaly_protocol.start] - extremum diff --git a/gutenTAG/anomalies/types/frequency.py b/gutenTAG/anomalies/types/frequency.py index 8672226..3fa1a7c 100644 --- a/gutenTAG/anomalies/types/frequency.py +++ b/gutenTAG/anomalies/types/frequency.py @@ -20,8 +20,8 @@ def generate(self, anomaly_protocol: AnomalyProtocol) -> AnomalyProtocol: ecg = anomaly_protocol.base_oscillation subsequence = ecg.generate_only_base( anomaly_protocol.ctx.to_bo(), - frequency=ecg.frequency * self.frequency_factor - )[anomaly_protocol.start:anomaly_protocol.end] + frequency=ecg.frequency * self.frequency_factor, + )[anomaly_protocol.start : anomaly_protocol.end] anomaly_protocol.subsequences.append(subsequence) elif anomaly_protocol.base_oscillation.is_periodic(): @@ -31,12 +31,14 @@ def generate(self, anomaly_protocol: AnomalyProtocol) -> AnomalyProtocol: anomaly_protocol.ctx.to_bo(), length=length, frequency=bo.frequency * self.frequency_factor, - freq_mod=bo.freq_mod + freq_mod=bo.freq_mod, ) anomaly_protocol.subsequences.append(subsequence) else: - self.logger.warn_false_combination(self.__class__.__name__, anomaly_protocol.base_oscillation_kind) + self.logger.warn_false_combination( + self.__class__.__name__, anomaly_protocol.base_oscillation_kind + ) return anomaly_protocol diff --git a/gutenTAG/anomalies/types/kind.py b/gutenTAG/anomalies/types/kind.py index 392d8b8..1c29f8a 100644 --- a/gutenTAG/anomalies/types/kind.py +++ b/gutenTAG/anomalies/types/kind.py @@ -1,5 +1,5 @@ from enum import Enum -from typing import Dict +from typing import Any, Dict from .amplitude import AnomalyAmplitude from .extremum import AnomalyExtremum @@ -27,31 +27,35 @@ class AnomalyKind(Enum): Trend = ANOMALY_TYPE_NAMES.TREND ModeCorrelation = ANOMALY_TYPE_NAMES.MODE_CORRELATION - def create(self, parameters: Dict) -> BaseAnomaly: + def create(self, parameters: Dict[str, Any]) -> BaseAnomaly: # noqa: C901 if self == AnomalyKind.Platform: - anomaly: BaseAnomaly = AnomalyPlatform(AnomalyPlatform.get_parameter_class()(**parameters)) + return self._instantiate_anomaly(AnomalyPlatform, parameters) elif self == AnomalyKind.Frequency: - anomaly = AnomalyFrequency(AnomalyFrequency.get_parameter_class()(**parameters)) + return self._instantiate_anomaly(AnomalyFrequency, parameters) elif self == AnomalyKind.Extremum: - anomaly = AnomalyExtremum(AnomalyExtremum.get_parameter_class()(**parameters)) + return self._instantiate_anomaly(AnomalyExtremum, parameters) elif self == AnomalyKind.Variance: - anomaly = AnomalyVariance(AnomalyVariance.get_parameter_class()(**parameters)) + return self._instantiate_anomaly(AnomalyVariance, parameters) elif self == AnomalyKind.Mean: - anomaly = AnomalyMean(AnomalyMean.get_parameter_class()(**parameters)) + return self._instantiate_anomaly(AnomalyMean, parameters) elif self == AnomalyKind.Pattern: - anomaly = AnomalyPattern(AnomalyPattern.get_parameter_class()(**parameters)) + return self._instantiate_anomaly(AnomalyPattern, parameters) elif self == AnomalyKind.PatternShift: - anomaly = AnomalyPatternShift(AnomalyPatternShift.get_parameter_class()(**parameters)) + return self._instantiate_anomaly(AnomalyPatternShift, parameters) elif self == AnomalyKind.Amplitude: - anomaly = AnomalyAmplitude(AnomalyAmplitude.get_parameter_class()(**parameters)) + return self._instantiate_anomaly(AnomalyAmplitude, parameters) elif self == AnomalyKind.Trend: - anomaly = AnomalyTrend(AnomalyTrend.get_parameter_class()(**parameters)) + return self._instantiate_anomaly(AnomalyTrend, parameters) elif self == AnomalyKind.ModeCorrelation: - anomaly = AnomalyModeCorrelation(AnomalyModeCorrelation.get_parameter_class()()) + return self._instantiate_anomaly(AnomalyModeCorrelation, parameters) else: - raise ValueError(f"AnomalyKind {self.value} is not supported, yet! Guten Tag!") + raise ValueError( + f"AnomalyKind {self.value} is not supported, yet! Guten Tag!" + ) - return anomaly + @staticmethod + def _instantiate_anomaly(cls, parameters: Dict[str, Any]) -> BaseAnomaly: + return cls(cls.get_parameter_class()(**parameters)) @classmethod def has_value(cls, v: str) -> bool: diff --git a/gutenTAG/anomalies/types/mean.py b/gutenTAG/anomalies/types/mean.py index e19ffe7..b4affb0 100644 --- a/gutenTAG/anomalies/types/mean.py +++ b/gutenTAG/anomalies/types/mean.py @@ -20,13 +20,17 @@ def __init__(self, parameters: AnomalyMeanParameters): def generate(self, anomaly_protocol: AnomalyProtocol) -> AnomalyProtocol: if anomaly_protocol.base_oscillation_kind == RandomModeJump.KIND: - self.logger.warn_false_combination(self.__class__.__name__, anomaly_protocol.base_oscillation_kind) + self.logger.warn_false_combination( + self.__class__.__name__, anomaly_protocol.base_oscillation_kind + ) return anomaly_protocol base = anomaly_protocol.base_oscillation ts: np.ndarray = base.timeseries creeping = self.generate_creeping(anomaly_protocol) - subsequence = ts[anomaly_protocol.start:anomaly_protocol.end] + self.offset * creeping + subsequence = ( + ts[anomaly_protocol.start : anomaly_protocol.end] + self.offset * creeping + ) anomaly_protocol.subsequences.append(subsequence) return anomaly_protocol diff --git a/gutenTAG/anomalies/types/mode_correlation.py b/gutenTAG/anomalies/types/mode_correlation.py index ac6c279..72c788f 100644 --- a/gutenTAG/anomalies/types/mode_correlation.py +++ b/gutenTAG/anomalies/types/mode_correlation.py @@ -18,11 +18,15 @@ def __init__(self, parameters: AnomalyModeCorrelationParameters): def generate(self, anomaly_protocol: AnomalyProtocol) -> AnomalyProtocol: if anomaly_protocol.base_oscillation_kind == RandomModeJump.KIND: timeseries = anomaly_protocol.base_oscillation.timeseries - subsequence = timeseries[anomaly_protocol.start:anomaly_protocol.end] * -1 + subsequence = timeseries[anomaly_protocol.start : anomaly_protocol.end] * -1 anomaly_protocol.subsequences.append(subsequence) else: - self.logger.warn_false_combination(self.__class__.__name__, anomaly_protocol.base_oscillation_kind) - self.logger.warning("A `mode_correlation` anomaly can be injected in only a `random_mode_jump` base oscillation!") + self.logger.warn_false_combination( + self.__class__.__name__, anomaly_protocol.base_oscillation_kind + ) + self.logger.warning( + "A `mode_correlation` anomaly can be injected in only a `random_mode_jump` base oscillation!" + ) return anomaly_protocol @property diff --git a/gutenTAG/anomalies/types/pattern.py b/gutenTAG/anomalies/types/pattern.py index f994851..0348d41 100644 --- a/gutenTAG/anomalies/types/pattern.py +++ b/gutenTAG/anomalies/types/pattern.py @@ -30,8 +30,9 @@ def generate(self, anomaly_protocol: AnomalyProtocol) -> AnomalyProtocol: cbf = anomaly_protocol.base_oscillation subsequence = cbf.generate_only_base( anomaly_protocol.ctx.to_bo(), - variance_pattern_length=cbf.variance_pattern_length * self.cbf_pattern_factor - )[anomaly_protocol.start:anomaly_protocol.end] + variance_pattern_length=cbf.variance_pattern_length + * self.cbf_pattern_factor, + )[anomaly_protocol.start : anomaly_protocol.end] anomaly_protocol.subsequences.append(subsequence) elif anomaly_protocol.base_oscillation_kind == ECG.KIND: @@ -40,61 +41,82 @@ def generate(self, anomaly_protocol: AnomalyProtocol) -> AnomalyProtocol: window = int(length * 0.05) for slide in range(-3, 3): - start = ecg.timeseries[anomaly_protocol.start + slide:anomaly_protocol.start + window] + start = ecg.timeseries[ + anomaly_protocol.start + slide : anomaly_protocol.start + window + ] if np.argmax(start) == 0: break else: slide = 0 - subsequence = ecg.timeseries[anomaly_protocol.start + slide:anomaly_protocol.end + slide][::-1] + subsequence = ecg.timeseries[ + anomaly_protocol.start + slide : anomaly_protocol.end + slide + ][::-1] anomaly_protocol.subsequences.append(subsequence) elif anomaly_protocol.base_oscillation_kind == Sawtooth.KIND: subsequence = anomaly_protocol.base_oscillation.generate_only_base( - anomaly_protocol.ctx.to_bo(), - width=self.sawtooth_width - )[anomaly_protocol.start:anomaly_protocol.end] + anomaly_protocol.ctx.to_bo(), width=self.sawtooth_width + )[anomaly_protocol.start : anomaly_protocol.end] anomaly_protocol.subsequences.append(subsequence) elif anomaly_protocol.base_oscillation_kind == Square.KIND: subsequence = anomaly_protocol.base_oscillation.generate_only_base( - anomaly_protocol.ctx.to_bo(), - duty=self.square_duty - )[anomaly_protocol.start:anomaly_protocol.end] + anomaly_protocol.ctx.to_bo(), duty=self.square_duty + )[anomaly_protocol.start : anomaly_protocol.end] anomaly_protocol.subsequences.append(subsequence) elif anomaly_protocol.base_oscillation_kind == MLS.KIND: transition_window = int(0.1 * anomaly_protocol.length) transition_window = transition_window - transition_window % 2 - subsequence = anomaly_protocol.base_oscillation.timeseries[anomaly_protocol.start:anomaly_protocol.end] + subsequence = anomaly_protocol.base_oscillation.timeseries[ + anomaly_protocol.start : anomaly_protocol.end + ] reversed = subsequence[::-1] - transition_start = np.interp(np.linspace(0, transition_window*2, transition_window), - np.arange(transition_window*2), - np.r_[subsequence[:transition_window], reversed[:transition_window]]) - transition_end = np.interp(np.linspace(0, transition_window*2, transition_window), - np.arange(transition_window*2), - np.r_[reversed[-transition_window:], subsequence[-transition_window:]]) - - subsequence = np.concatenate([ - transition_start, - reversed[transition_window:-transition_window], - transition_end - ]) + transition_start = np.interp( + np.linspace(0, transition_window * 2, transition_window), + np.arange(transition_window * 2), + np.r_[subsequence[:transition_window], reversed[:transition_window]], + ) + transition_end = np.interp( + np.linspace(0, transition_window * 2, transition_window), + np.arange(transition_window * 2), + np.r_[reversed[-transition_window:], subsequence[-transition_window:]], + ) + + subsequence = np.concatenate( + [ + transition_start, + reversed[transition_window:-transition_window], + transition_end, + ] + ) anomaly_protocol.subsequences.append(subsequence) elif anomaly_protocol.base_oscillation.is_periodic(): - def sinusoid(t: np.ndarray, k: float, a_min: float, a_max: float) -> np.ndarray: + + def sinusoid( + t: np.ndarray, k: float, a_min: float, a_max: float + ) -> np.ndarray: pattern = np.arctan(k * t) / np.arctan(k) - scaled = MinMaxScaler(feature_range=(a_min, a_max)).fit_transform(pattern.reshape(-1, 1)).reshape(-1) + scaled = ( + MinMaxScaler(feature_range=(a_min, a_max)) + .fit_transform(pattern.reshape(-1, 1)) + .reshape(-1) + ) return scaled bo = anomaly_protocol.base_oscillation - snippet = bo.timeseries[anomaly_protocol.start:anomaly_protocol.end] - subsequence = sinusoid(snippet, self.sinusoid_k, snippet.min(), snippet.max()) + snippet = bo.timeseries[anomaly_protocol.start : anomaly_protocol.end] + subsequence = sinusoid( + snippet, self.sinusoid_k, snippet.min(), snippet.max() + ) anomaly_protocol.subsequences.append(subsequence) else: - self.logger.warn_false_combination(self.__class__.__name__, anomaly_protocol.base_oscillation_kind) + self.logger.warn_false_combination( + self.__class__.__name__, anomaly_protocol.base_oscillation_kind + ) return anomaly_protocol @property diff --git a/gutenTAG/anomalies/types/pattern_shift.py b/gutenTAG/anomalies/types/pattern_shift.py index 86c6ff7..fef6d74 100644 --- a/gutenTAG/anomalies/types/pattern_shift.py +++ b/gutenTAG/anomalies/types/pattern_shift.py @@ -5,7 +5,6 @@ from . import BaseAnomaly from .. import AnomalyProtocol -from ...base_oscillations import ECG, Sine, Cosine @dataclass @@ -22,27 +21,36 @@ def __init__(self, parameters: AnomalyPatternShiftParameters): def generate(self, anomaly_protocol: AnomalyProtocol) -> AnomalyProtocol: if anomaly_protocol.base_oscillation.is_periodic(): - assert abs(self.shift_by) <= self.transition_window, \ - "The parameter 'shift_by' must not be larger than 'transition_window' in absolute terms! Guten Tag!" + assert ( + abs(self.shift_by) <= self.transition_window + ), "The parameter 'shift_by' must not be larger than 'transition_window' in absolute terms! Guten Tag!" base = anomaly_protocol.base_oscillation - subsequence = base.timeseries[anomaly_protocol.start:anomaly_protocol.end] - transition_start = np.interp(np.linspace(0, self.transition_window, self.transition_window + self.shift_by), - np.arange(self.transition_window), subsequence[:self.transition_window]) - shifted = subsequence[self.transition_window:-self.transition_window] - transition_end = np.interp(np.linspace(0, self.transition_window, self.transition_window - self.shift_by), - np.arange(self.transition_window), subsequence[-self.transition_window:]) - - subsequence = np.concatenate([ - transition_start, - shifted, - transition_end - ]) + subsequence = base.timeseries[anomaly_protocol.start : anomaly_protocol.end] + transition_start = np.interp( + np.linspace( + 0, self.transition_window, self.transition_window + self.shift_by + ), + np.arange(self.transition_window), + subsequence[: self.transition_window], + ) + shifted = subsequence[self.transition_window : -self.transition_window] + transition_end = np.interp( + np.linspace( + 0, self.transition_window, self.transition_window - self.shift_by + ), + np.arange(self.transition_window), + subsequence[-self.transition_window :], + ) + + subsequence = np.concatenate([transition_start, shifted, transition_end]) anomaly_protocol.subsequences.append(subsequence) else: - self.logger.warn_false_combination(self.__class__.__name__, anomaly_protocol.base_oscillation_kind) + self.logger.warn_false_combination( + self.__class__.__name__, anomaly_protocol.base_oscillation_kind + ) return anomaly_protocol @property diff --git a/gutenTAG/anomalies/types/platform.py b/gutenTAG/anomalies/types/platform.py index 1dfa111..7a64a0f 100644 --- a/gutenTAG/anomalies/types/platform.py +++ b/gutenTAG/anomalies/types/platform.py @@ -19,7 +19,9 @@ def __init__(self, parameters: AnomalyPlatformParameters): def generate(self, anomaly_protocol: AnomalyProtocol) -> AnomalyProtocol: if anomaly_protocol.base_oscillation_kind == RandomModeJump.KIND: - self.logger.warn_false_combination(self.__class__.__name__, anomaly_protocol.base_oscillation_kind) + self.logger.warn_false_combination( + self.__class__.__name__, anomaly_protocol.base_oscillation_kind + ) return anomaly_protocol length = anomaly_protocol.end - anomaly_protocol.start diff --git a/gutenTAG/anomalies/types/trend.py b/gutenTAG/anomalies/types/trend.py index be26472..4efb23e 100644 --- a/gutenTAG/anomalies/types/trend.py +++ b/gutenTAG/anomalies/types/trend.py @@ -11,7 +11,7 @@ @dataclass class AnomalyTrendParameters: - trend: 'BaseOscillationInterface' # type: ignore # otherwise we have a circular import + trend: "BaseOscillationInterface" # type: ignore # noqa: F821 # otherwise we have a circular import class AnomalyTrend(BaseAnomaly): @@ -21,15 +21,23 @@ def __init__(self, parameters: AnomalyTrendParameters): def generate(self, anomaly_protocol: AnomalyProtocol) -> AnomalyProtocol: if anomaly_protocol.base_oscillation_kind == RandomModeJump.KIND: - self.logger.warn_false_combination(self.__class__.__name__, anomaly_protocol.base_oscillation_kind) + self.logger.warn_false_combination( + self.__class__.__name__, anomaly_protocol.base_oscillation_kind + ) return anomaly_protocol length = anomaly_protocol.end - anomaly_protocol.start transition_length = int(length * 0.2) plateau_length = int(length * 0.8) start_transition = norm.pdf(np.linspace(-3, 0, transition_length), scale=1.05) - amplitude_bell = np.concatenate([start_transition / start_transition.max(), np.ones(plateau_length)]) - amplitude_bell = MinMaxScaler(feature_range=(0, 1)).fit_transform(amplitude_bell.reshape(-1, 1)).reshape(-1) + amplitude_bell = np.concatenate( + [start_transition / start_transition.max(), np.ones(plateau_length)] + ) + amplitude_bell = ( + MinMaxScaler(feature_range=(0, 1)) + .fit_transform(amplitude_bell.reshape(-1, 1)) + .reshape(-1) + ) self.trend.length = length self.trend.generate_timeseries_and_variations(anomaly_protocol.ctx.to_bo()) @@ -38,8 +46,12 @@ def generate(self, anomaly_protocol: AnomalyProtocol) -> AnomalyProtocol: timeseries *= amplitude_bell end_point = timeseries[-1] - anomaly_protocol.base_oscillation.trend_series[anomaly_protocol.start:anomaly_protocol.end] += timeseries - anomaly_protocol.base_oscillation.trend_series[anomaly_protocol.end:] += end_point + anomaly_protocol.base_oscillation.trend_series[ + anomaly_protocol.start : anomaly_protocol.end + ] += timeseries + anomaly_protocol.base_oscillation.trend_series[ + anomaly_protocol.end : + ] += end_point return anomaly_protocol diff --git a/gutenTAG/anomalies/types/variance.py b/gutenTAG/anomalies/types/variance.py index 06dc498..cca86bd 100644 --- a/gutenTAG/anomalies/types/variance.py +++ b/gutenTAG/anomalies/types/variance.py @@ -19,22 +19,31 @@ def __init__(self, parameters: AnomalyVarianceParameters): def generate(self, anomaly_protocol: AnomalyProtocol) -> AnomalyProtocol: base = anomaly_protocol.base_oscillation if anomaly_protocol.base_oscillation_kind == RandomModeJump.KIND: - self.logger.warn_false_combination(self.__class__.__name__, anomaly_protocol.base_oscillation_kind) + self.logger.warn_false_combination( + self.__class__.__name__, anomaly_protocol.base_oscillation_kind + ) elif anomaly_protocol.base_oscillation_kind == CylinderBellFunnel.KIND: subsequence = base.generate_only_base( - anomaly_protocol.ctx.to_bo(), - variance=self.variance - )[anomaly_protocol.start:anomaly_protocol.end] + anomaly_protocol.ctx.to_bo(), variance=self.variance + )[anomaly_protocol.start : anomaly_protocol.end] anomaly_protocol.subsequences.append(subsequence) else: length = anomaly_protocol.end - anomaly_protocol.start variance_diff = self.variance - base.variance - creeping = self.generate_creeping(anomaly_protocol) * variance_diff + base.variance # from 0 to variance_diff - creeping /= self.variance * base.amplitude # get relative transition from base variance to anomaly variance - subsequence_noise = base.generate_noise(anomaly_protocol.ctx.to_bo(), self.variance * base.amplitude, length) - base.noise[anomaly_protocol.start:anomaly_protocol.end] = subsequence_noise * creeping + creeping = ( + self.generate_creeping(anomaly_protocol) * variance_diff + base.variance + ) # from 0 to variance_diff + creeping /= ( + self.variance * base.amplitude + ) # get relative transition from base variance to anomaly variance + subsequence_noise = base.generate_noise( + anomaly_protocol.ctx.to_bo(), self.variance * base.amplitude, length + ) + base.noise[anomaly_protocol.start : anomaly_protocol.end] = ( + subsequence_noise * creeping + ) return anomaly_protocol @property diff --git a/gutenTAG/api/__init__.py b/gutenTAG/api/__init__.py index aab17ae..d098e34 100644 --- a/gutenTAG/api/__init__.py +++ b/gutenTAG/api/__init__.py @@ -1 +1,14 @@ -from .bo import * +from .bo import ( + cosine, + cylinder_bell_funnel, + dirichlet, + ecg, + formula, + mls, + polynomial, + random_mode_jump, + random_walk, + sawtooth, + sine, + square, +) diff --git a/gutenTAG/base_oscillations/cosine.py b/gutenTAG/base_oscillations/cosine.py index 20b8f41..c4c3c31 100644 --- a/gutenTAG/base_oscillations/cosine.py +++ b/gutenTAG/base_oscillations/cosine.py @@ -4,9 +4,17 @@ from . import BaseOscillation from .interface import BaseOscillationInterface -from .utils.math_func_support import prepare_base_signal, generate_periodic_signal, calc_n_periods +from .utils.math_func_support import ( + prepare_base_signal, + generate_periodic_signal, + calc_n_periods, +) from ..utils.default_values import default_values -from ..utils.global_variables import BASE_OSCILLATION_NAMES, BASE_OSCILLATIONS, PARAMETERS +from ..utils.global_variables import ( + BASE_OSCILLATION_NAMES, + BASE_OSCILLATIONS, + PARAMETERS, +) from ..utils.types import BOGenerationContext @@ -19,13 +27,16 @@ def get_base_oscillation_kind(self) -> str: def get_timeseries_periods(self) -> Optional[int]: return calc_n_periods(self.length, self.frequency) - def generate_only_base(self, - ctx: BOGenerationContext, - length: Optional[int] = None, - frequency: Optional[float] = None, - amplitude: Optional[float] = None, - freq_mod: Optional[float] = None, - *args, **kwargs) -> np.ndarray: + def generate_only_base( + self, + ctx: BOGenerationContext, + length: Optional[int] = None, + frequency: Optional[float] = None, + amplitude: Optional[float] = None, + freq_mod: Optional[float] = None, + *args, + **kwargs + ) -> np.ndarray: n: int = length or self.length # in points f: float = frequency or self.frequency # in Hz a: float = amplitude or self.amplitude @@ -33,10 +44,12 @@ def generate_only_base(self, return cosine(n, f, a, v_freq_mod) -def cosine(length: int = default_values[BASE_OSCILLATIONS][PARAMETERS.LENGTH], - frequency: float = default_values[BASE_OSCILLATIONS][PARAMETERS.FREQUENCY], - amplitude: float = default_values[BASE_OSCILLATIONS][PARAMETERS.AMPLITUDE], - freq_mod: float = default_values[BASE_OSCILLATIONS][PARAMETERS.FREQ_MOD]) -> np.ndarray: +def cosine( + length: int = default_values[BASE_OSCILLATIONS][PARAMETERS.LENGTH], + frequency: float = default_values[BASE_OSCILLATIONS][PARAMETERS.FREQUENCY], + amplitude: float = default_values[BASE_OSCILLATIONS][PARAMETERS.AMPLITUDE], + freq_mod: float = default_values[BASE_OSCILLATIONS][PARAMETERS.FREQ_MOD], +) -> np.ndarray: base_ts = prepare_base_signal(length, frequency) return generate_periodic_signal(base_ts, np.cos, amplitude, freq_mod) diff --git a/gutenTAG/base_oscillations/custom_input.py b/gutenTAG/base_oscillations/custom_input.py index 232b665..3bd7c93 100644 --- a/gutenTAG/base_oscillations/custom_input.py +++ b/gutenTAG/base_oscillations/custom_input.py @@ -1,4 +1,4 @@ -from typing import Optional, List, Union +from typing import Optional, Union import numpy as np import pandas as pd @@ -102,7 +102,7 @@ def generate_only_base( ) col_type = df.dtypes[0] if col_type != np.float_: - df = df.astype(float) + df = df.astype(np.float_) warnings.warn( f"Input data was of {col_type} type and has been automatically converted to float." ) diff --git a/gutenTAG/base_oscillations/cylinder_bell_funnel.py b/gutenTAG/base_oscillations/cylinder_bell_funnel.py index 8b0bcc8..31f087c 100644 --- a/gutenTAG/base_oscillations/cylinder_bell_funnel.py +++ b/gutenTAG/base_oscillations/cylinder_bell_funnel.py @@ -5,7 +5,11 @@ from . import BaseOscillation from .interface import BaseOscillationInterface from ..utils.default_values import default_values -from ..utils.global_variables import BASE_OSCILLATION_NAMES, BASE_OSCILLATIONS, PARAMETERS +from ..utils.global_variables import ( + BASE_OSCILLATION_NAMES, + BASE_OSCILLATIONS, + PARAMETERS, +) from ..utils.types import BOGenerationContext @@ -24,17 +28,22 @@ def is_periodic(self) -> bool: """CylinderBellFunnel has reoccurring patterns but no fixed periodicity!""" return False - def generate_only_base(self, - ctx: BOGenerationContext, - length: Optional[int] = None, - variance: Optional[float] = None, - amplitude: Optional[float] = None, - variance_pattern_length: Optional[int] = None, - *args, **kwargs) -> np.ndarray: + def generate_only_base( + self, + ctx: BOGenerationContext, + length: Optional[int] = None, + variance: Optional[float] = None, + amplitude: Optional[float] = None, + variance_pattern_length: Optional[int] = None, + *args, + **kwargs + ) -> np.ndarray: length = length or self.length variance = variance or self.variance amplitude = amplitude or self.amplitude - variance_pattern_length = variance_pattern_length or self.variance_pattern_length + variance_pattern_length = ( + variance_pattern_length or self.variance_pattern_length + ) return cylinder_bell_funnel( ctx.rng, @@ -43,15 +52,19 @@ def generate_only_base(self, avg_amplitude=amplitude, default_variance=variance, variance_pattern_length=variance_pattern_length, - variance_amplitude=self.variance_amplitude + variance_amplitude=self.variance_amplitude, ) - def generate_timeseries_and_variations(self, ctx: BOGenerationContext, **kwargs) -> BaseOscillationInterface: + def generate_timeseries_and_variations( + self, ctx: BOGenerationContext, **kwargs + ) -> BaseOscillationInterface: super().generate_timeseries_and_variations(ctx) if self.timeseries is not None and self.noise is not None: self.timeseries -= self.noise else: - raise AssertionError("`timeseries` and `noise` are None. Please, generate `timeseries` and `noise` before calling this method!") + raise AssertionError( + "`timeseries` and `noise` are None. Please, generate `timeseries` and `noise` before calling this method!" + ) return self @@ -60,14 +73,22 @@ def generate_timeseries_and_variations(self, ctx: BOGenerationContext, **kwargs) # Taken from https://github.com/KDD-OpenSource/data-generation/blob/master/generation/cbf.py # cylinder bell funnel based on "Learning comprehensible descriptions of multivariate time series" -def cylinder_bell_funnel(rng: np.random.Generator = np.random.default_rng(), - length: int = default_values[BASE_OSCILLATIONS][PARAMETERS.LENGTH], - avg_pattern_length: int = default_values[BASE_OSCILLATIONS][PARAMETERS.AVG_PATTERN_LENGTH], - avg_amplitude: float = default_values[BASE_OSCILLATIONS][PARAMETERS.AMPLITUDE], - default_variance: float = default_values[BASE_OSCILLATIONS][PARAMETERS.VARIANCE], - variance_pattern_length: int = default_values[BASE_OSCILLATIONS][PARAMETERS.VARIANCE_PATTERN_LENGTH], - variance_amplitude: float = default_values[BASE_OSCILLATIONS][PARAMETERS.VARIANCE_AMPLITUDE], - include_negatives: bool = True) -> np.ndarray: +def cylinder_bell_funnel( + rng: np.random.Generator = np.random.default_rng(), + length: int = default_values[BASE_OSCILLATIONS][PARAMETERS.LENGTH], + avg_pattern_length: int = default_values[BASE_OSCILLATIONS][ + PARAMETERS.AVG_PATTERN_LENGTH + ], + avg_amplitude: float = default_values[BASE_OSCILLATIONS][PARAMETERS.AMPLITUDE], + default_variance: float = default_values[BASE_OSCILLATIONS][PARAMETERS.VARIANCE], + variance_pattern_length: int = default_values[BASE_OSCILLATIONS][ + PARAMETERS.VARIANCE_PATTERN_LENGTH + ], + variance_amplitude: float = default_values[BASE_OSCILLATIONS][ + PARAMETERS.VARIANCE_AMPLITUDE + ], + include_negatives: bool = True, +) -> np.ndarray: def generate_bell(n: int, a: float, v: float) -> np.ndarray: bell = rng.normal(0, v, n) + a * np.arange(n) / n return bell @@ -80,10 +101,16 @@ def generate_cylinder(n: int, a: float, v: float) -> np.ndarray: cylinder = rng.normal(0, v, n) + a return cylinder - generators: Sequence[Callable[[int, float, float], np.ndarray]] = (generate_bell, generate_funnel, generate_cylinder) + generators: Sequence[Callable[[int, float, float], np.ndarray]] = ( + generate_bell, + generate_funnel, + generate_cylinder, + ) data = rng.normal(0, default_variance, length) current_start = rng.integers(0, avg_pattern_length) - current_length = max(1, int(np.ceil(rng.normal(avg_pattern_length, variance_pattern_length)))) + current_length = max( + 1, int(np.ceil(rng.normal(avg_pattern_length, variance_pattern_length))) + ) while current_start + current_length < length: generator: Callable[[int, float, float], np.ndarray] = rng.choice(generators) # type: ignore # strange numpy type prevents chosing a callable @@ -96,9 +123,13 @@ def generate_cylinder(n: int, a: float, v: float) -> np.ndarray: if include_negatives and rng.random() > 0.5: pattern = -1 * pattern - data[current_start: current_start + current_length] = pattern + data[current_start : current_start + current_length] = pattern - current_start = current_start + current_length + rng.integers(0, avg_pattern_length) - current_length = max(1, int(np.ceil(rng.normal(avg_pattern_length, variance_pattern_length)))) + current_start = ( + current_start + current_length + rng.integers(0, avg_pattern_length) + ) + current_length = max( + 1, int(np.ceil(rng.normal(avg_pattern_length, variance_pattern_length))) + ) return data diff --git a/gutenTAG/base_oscillations/dirichlet.py b/gutenTAG/base_oscillations/dirichlet.py index 5dfeec2..d5a8ee5 100644 --- a/gutenTAG/base_oscillations/dirichlet.py +++ b/gutenTAG/base_oscillations/dirichlet.py @@ -6,9 +6,17 @@ from . import BaseOscillation from .interface import BaseOscillationInterface -from .utils.math_func_support import prepare_base_signal, generate_periodic_signal, calc_n_periods +from .utils.math_func_support import ( + prepare_base_signal, + generate_periodic_signal, + calc_n_periods, +) from ..utils.default_values import default_values -from ..utils.global_variables import BASE_OSCILLATION_NAMES, BASE_OSCILLATIONS, PARAMETERS +from ..utils.global_variables import ( + BASE_OSCILLATION_NAMES, + BASE_OSCILLATIONS, + PARAMETERS, +) from ..utils.types import BOGenerationContext @@ -19,16 +27,21 @@ def get_base_oscillation_kind(self) -> str: return self.KIND def get_timeseries_periods(self) -> Optional[int]: - return calc_n_periods(self.length, self.frequency) // ((self.periodicity % 2 == 0) + 1) - - def generate_only_base(self, - ctx: BOGenerationContext, - length: Optional[int] = None, - frequency: Optional[float] = None, - amplitude: Optional[float] = None, - freq_mod: Optional[float] = None, - periodicity: Optional[float] = None, - *args, **kwargs) -> np.ndarray: + return calc_n_periods(self.length, self.frequency) // ( + (self.periodicity % 2 == 0) + 1 + ) + + def generate_only_base( + self, + ctx: BOGenerationContext, + length: Optional[int] = None, + frequency: Optional[float] = None, + amplitude: Optional[float] = None, + freq_mod: Optional[float] = None, + periodicity: Optional[float] = None, + *args, + **kwargs + ) -> np.ndarray: n: int = length or self.length # in points f: float = frequency or self.frequency # in Hz a: float = amplitude or self.amplitude @@ -37,11 +50,15 @@ def generate_only_base(self, return dirichlet(n, f, a, p) -def dirichlet(length: int = default_values[BASE_OSCILLATIONS][PARAMETERS.LENGTH], - frequency: float = default_values[BASE_OSCILLATIONS][PARAMETERS.COMPLEXITY], - amplitude: float = default_values[BASE_OSCILLATIONS][PARAMETERS.AMPLITUDE], - periodicity: float = default_values[BASE_OSCILLATIONS][PARAMETERS.PERIODICITY]) -> np.ndarray: - assert periodicity > 1, "periodicity must be > 1, otherwise the dirichlet wave collapses" +def dirichlet( + length: int = default_values[BASE_OSCILLATIONS][PARAMETERS.LENGTH], + frequency: float = default_values[BASE_OSCILLATIONS][PARAMETERS.COMPLEXITY], + amplitude: float = default_values[BASE_OSCILLATIONS][PARAMETERS.AMPLITUDE], + periodicity: float = default_values[BASE_OSCILLATIONS][PARAMETERS.PERIODICITY], +) -> np.ndarray: + assert ( + periodicity > 1 + ), "periodicity must be > 1, otherwise the dirichlet wave collapses" base_ts = prepare_base_signal(length, frequency) func = partial(scipy.special.diric, n=periodicity) return generate_periodic_signal(base_ts, func, amplitude) diff --git a/gutenTAG/base_oscillations/ecg.py b/gutenTAG/base_oscillations/ecg.py index 66dfa9d..ad13355 100644 --- a/gutenTAG/base_oscillations/ecg.py +++ b/gutenTAG/base_oscillations/ecg.py @@ -6,7 +6,11 @@ from . import BaseOscillation from .interface import BaseOscillationInterface from ..utils.default_values import default_values -from ..utils.global_variables import BASE_OSCILLATION_NAMES, BASE_OSCILLATIONS, PARAMETERS +from ..utils.global_variables import ( + BASE_OSCILLATION_NAMES, + BASE_OSCILLATIONS, + PARAMETERS, +) from ..utils.types import BOGenerationContext @@ -21,14 +25,19 @@ def get_base_oscillation_kind(self) -> str: return self.KIND def get_timeseries_periods(self) -> Optional[int]: - return int((self.length // sampling_rate) * (self.frequency / 100 * sampling_rate)) - - def generate_only_base(self, - ctx: BOGenerationContext, - length: Optional[int] = None, - frequency: Optional[float] = None, - amplitude: Optional[float] = None, - *args, **kwargs) -> np.ndarray: + return int( + (self.length // sampling_rate) * (self.frequency / 100 * sampling_rate) + ) + + def generate_only_base( + self, + ctx: BOGenerationContext, + length: Optional[int] = None, + frequency: Optional[float] = None, + amplitude: Optional[float] = None, + *args, + **kwargs + ) -> np.ndarray: length = length or self.length frequency = frequency or self.frequency amplitude = amplitude or self.amplitude @@ -36,21 +45,25 @@ def generate_only_base(self, return ecg(ctx.rng, length, frequency, amplitude, self.ecg_sim_method) -def ecg(rng: np.random.Generator = np.random.default_rng(), - length: int = default_values[BASE_OSCILLATIONS][PARAMETERS.LENGTH], - frequency: float = default_values[BASE_OSCILLATIONS][PARAMETERS.FREQUENCY], - amplitude: float = default_values[BASE_OSCILLATIONS][PARAMETERS.AMPLITUDE], - ecg_sim_method: str = default_values[BASE_OSCILLATIONS][PARAMETERS.ECG_SIM_METHOD]) -> np.ndarray: +def ecg( + rng: np.random.Generator = np.random.default_rng(), + length: int = default_values[BASE_OSCILLATIONS][PARAMETERS.LENGTH], + frequency: float = default_values[BASE_OSCILLATIONS][PARAMETERS.FREQUENCY], + amplitude: float = default_values[BASE_OSCILLATIONS][PARAMETERS.AMPLITUDE], + ecg_sim_method: str = default_values[BASE_OSCILLATIONS][PARAMETERS.ECG_SIM_METHOD], +) -> np.ndarray: duration = length // sampling_rate # frequency = beats per 100 points = beats per second heart_rate = int(frequency / 100 * sampling_rate * 60) - ecg = nk.ecg_simulate(duration=duration, - sampling_rate=sampling_rate, - heart_rate=heart_rate, - length=length, - random_state=rng.integers(0, int(1e9)), - noise=0, - method=ecg_sim_method) + ecg = nk.ecg_simulate( + duration=duration, + sampling_rate=sampling_rate, + heart_rate=heart_rate, + length=length, + random_state=rng.integers(0, int(1e9)), + noise=0, + method=ecg_sim_method, + ) return ecg * amplitude diff --git a/gutenTAG/base_oscillations/formula.py b/gutenTAG/base_oscillations/formula.py index 478b39f..c861e2f 100644 --- a/gutenTAG/base_oscillations/formula.py +++ b/gutenTAG/base_oscillations/formula.py @@ -9,7 +9,11 @@ from . import BaseOscillation from .interface import BaseOscillationInterface from ..utils.default_values import default_values -from ..utils.global_variables import BASE_OSCILLATION_NAMES, BASE_OSCILLATIONS, PARAMETERS +from ..utils.global_variables import ( + BASE_OSCILLATION_NAMES, + BASE_OSCILLATIONS, + PARAMETERS, +) from ..utils.types import BOGenerationContext @@ -30,13 +34,19 @@ def get_base_oscillation_kind(self) -> str: def get_timeseries_periods(self) -> Optional[int]: return None - def generate_only_base(self, ctx: BOGenerationContext, *args, **kwargs) -> np.ndarray: + def generate_only_base( + self, ctx: BOGenerationContext, *args, **kwargs + ) -> np.ndarray: c = ctx.previous_channels if ctx.previous_channels else [] return formula(c, self.formula) -def formula(previous_channels: List[np.ndarray] = (), - formula_dict: Dict[str, Any] = default_values[BASE_OSCILLATIONS][PARAMETERS.FORMULA]) -> np.ndarray: +def formula( + previous_channels: List[np.ndarray] = (), + formula_dict: Dict[str, Any] = default_values[BASE_OSCILLATIONS][ + PARAMETERS.FORMULA + ], +) -> np.ndarray: return FormulaParser(formula_dict).parse(previous_channels).execute() @@ -88,11 +98,10 @@ def from_dict(d: Dict, prev_channels: List[np.ndarray]) -> Operation: elif type(operand) == dict: operand = FormulaObj.from_dict(operand, prev_channels) else: - raise ValueError("The Operand in Operation has to be either `float` or an `object`") - return Operation( - kind=kind, - operand=operand - ) + raise ValueError( + "The Operand in Operation has to be either `float` or an `object`" + ) + return Operation(kind=kind, operand=operand) class Aggregation(NamedTuple): @@ -106,10 +115,7 @@ def execute(self, base: np.ndarray) -> np.ndarray: def from_dict(d: Dict) -> Aggregation: kind = AggregationType(d.get(KIND)) axis = d.get(AXIS, None) - return Aggregation( - kind=kind, - axis=axis - ) + return Aggregation(kind=kind, axis=axis) class FormulaObj(NamedTuple): @@ -138,7 +144,9 @@ def from_dict(d: Dict, prev_channels: List[np.ndarray]) -> FormulaObj: base = d.get(BASE) operation = d.get(OPERATION, None) aggregation = d.get(AGGREGATION, None) - assert operation is None or aggregation is None, "Only one `operation` or `aggregation` can be set, not both!" + assert ( + operation is None or aggregation is None + ), "Only one `operation` or `aggregation` can be set, not both!" if type(base) == dict: base = FormulaObj.from_dict(base, prev_channels=prev_channels) @@ -150,7 +158,7 @@ def from_dict(d: Dict, prev_channels: List[np.ndarray]) -> FormulaObj: return FormulaObj( base=base, prev_channels=prev_channels, - operation=Operation.from_dict(operation, prev_channels) + operation=Operation.from_dict(operation, prev_channels), ) else: raise ValueError("The Operation has to be an `object`.") @@ -159,7 +167,7 @@ def from_dict(d: Dict, prev_channels: List[np.ndarray]) -> FormulaObj: return FormulaObj( base=base, prev_channels=prev_channels, - aggregation=Aggregation.from_dict(aggregation) + aggregation=Aggregation.from_dict(aggregation), ) else: raise ValueError("The Aggregation has to be an `object`.") diff --git a/gutenTAG/base_oscillations/interface.py b/gutenTAG/base_oscillations/interface.py index a520ef7..ad79655 100644 --- a/gutenTAG/base_oscillations/interface.py +++ b/gutenTAG/base_oscillations/interface.py @@ -11,38 +11,104 @@ class BaseOscillationInterface(ABC): def __init__(self, *args, **kwargs): # parameters - self.length = kwargs.get(PARAMETERS.LENGTH, default_values[BASE_OSCILLATIONS][PARAMETERS.LENGTH]) - self.frequency = kwargs.get(PARAMETERS.FREQUENCY, default_values[BASE_OSCILLATIONS][PARAMETERS.FREQUENCY]) - self.amplitude = kwargs.get(PARAMETERS.AMPLITUDE, default_values[BASE_OSCILLATIONS][PARAMETERS.AMPLITUDE]) - self.variance = kwargs.get(PARAMETERS.VARIANCE, default_values[BASE_OSCILLATIONS][PARAMETERS.VARIANCE]) - self.avg_pattern_length = kwargs.get(PARAMETERS.AVG_PATTERN_LENGTH, default_values[BASE_OSCILLATIONS][PARAMETERS.AVG_PATTERN_LENGTH]) - self.variance_pattern_length = kwargs.get(PARAMETERS.VARIANCE_PATTERN_LENGTH, default_values[BASE_OSCILLATIONS][PARAMETERS.VARIANCE_PATTERN_LENGTH]) - self.variance_amplitude = kwargs.get(PARAMETERS.VARIANCE_AMPLITUDE, default_values[BASE_OSCILLATIONS][PARAMETERS.VARIANCE_AMPLITUDE]) - self.freq_mod = kwargs.get(PARAMETERS.FREQ_MOD, default_values[BASE_OSCILLATIONS][PARAMETERS.FREQ_MOD]) - self.polynomial = kwargs.get(PARAMETERS.POLYNOMIAL, default_values[BASE_OSCILLATIONS][PARAMETERS.POLYNOMIAL]) - self.trend: Optional[BaseOscillationInterface] = kwargs.get(PARAMETERS.TREND, default_values[BASE_OSCILLATIONS][PARAMETERS.TREND]) - self.smoothing = kwargs.get(PARAMETERS.SMOOTHING, default_values[BASE_OSCILLATIONS][PARAMETERS.SMOOTHING]) - self.channel_diff = kwargs.get(PARAMETERS.CHANNEL_DIFF, default_values[BASE_OSCILLATIONS][PARAMETERS.CHANNEL_DIFF]) + self.length = kwargs.get( + PARAMETERS.LENGTH, default_values[BASE_OSCILLATIONS][PARAMETERS.LENGTH] + ) + self.frequency = kwargs.get( + PARAMETERS.FREQUENCY, + default_values[BASE_OSCILLATIONS][PARAMETERS.FREQUENCY], + ) + self.amplitude = kwargs.get( + PARAMETERS.AMPLITUDE, + default_values[BASE_OSCILLATIONS][PARAMETERS.AMPLITUDE], + ) + self.variance = kwargs.get( + PARAMETERS.VARIANCE, default_values[BASE_OSCILLATIONS][PARAMETERS.VARIANCE] + ) + self.avg_pattern_length = kwargs.get( + PARAMETERS.AVG_PATTERN_LENGTH, + default_values[BASE_OSCILLATIONS][PARAMETERS.AVG_PATTERN_LENGTH], + ) + self.variance_pattern_length = kwargs.get( + PARAMETERS.VARIANCE_PATTERN_LENGTH, + default_values[BASE_OSCILLATIONS][PARAMETERS.VARIANCE_PATTERN_LENGTH], + ) + self.variance_amplitude = kwargs.get( + PARAMETERS.VARIANCE_AMPLITUDE, + default_values[BASE_OSCILLATIONS][PARAMETERS.VARIANCE_AMPLITUDE], + ) + self.freq_mod = kwargs.get( + PARAMETERS.FREQ_MOD, default_values[BASE_OSCILLATIONS][PARAMETERS.FREQ_MOD] + ) + self.polynomial = kwargs.get( + PARAMETERS.POLYNOMIAL, + default_values[BASE_OSCILLATIONS][PARAMETERS.POLYNOMIAL], + ) + self.trend: Optional[BaseOscillationInterface] = kwargs.get( + PARAMETERS.TREND, default_values[BASE_OSCILLATIONS][PARAMETERS.TREND] + ) + self.smoothing = kwargs.get( + PARAMETERS.SMOOTHING, + default_values[BASE_OSCILLATIONS][PARAMETERS.SMOOTHING], + ) + self.channel_diff = kwargs.get( + PARAMETERS.CHANNEL_DIFF, + default_values[BASE_OSCILLATIONS][PARAMETERS.CHANNEL_DIFF], + ) self.channel_offset = kwargs.get(PARAMETERS.CHANNEL_OFFSET, self.amplitude) - self.random_seed = kwargs.get(PARAMETERS.RANDOM_SEED, default_values[BASE_OSCILLATIONS][PARAMETERS.RANDOM_SEED]) - self.formula = kwargs.get(PARAMETERS.FORMULA, default_values[BASE_OSCILLATIONS][PARAMETERS.FORMULA]) - self.ecg_sim_method = kwargs.get(PARAMETERS.ECG_SIM_METHOD, default_values[BASE_OSCILLATIONS][PARAMETERS.ECG_SIM_METHOD]) - self.width = kwargs.get(PARAMETERS.WIDTH, default_values[BASE_OSCILLATIONS][PARAMETERS.WIDTH]) - self.duty = kwargs.get(PARAMETERS.DUTY, default_values[BASE_OSCILLATIONS][PARAMETERS.DUTY]) - self.periodicity = kwargs.get(PARAMETERS.PERIODICITY, default_values[BASE_OSCILLATIONS][PARAMETERS.PERIODICITY]) - self.complexity = kwargs.get(PARAMETERS.COMPLEXITY, default_values[BASE_OSCILLATIONS][PARAMETERS.COMPLEXITY]) - self.input_timeseries_path_train = kwargs.get(PARAMETERS.INPUT_TIMESERIES_PATH_TRAIN, default_values[BASE_OSCILLATIONS][PARAMETERS.INPUT_TIMESERIES_PATH_TRAIN]) - self.input_timeseries_path_test = kwargs.get(PARAMETERS.INPUT_TIMESERIES_PATH_TEST, default_values[BASE_OSCILLATIONS][PARAMETERS.INPUT_TIMESERIES_PATH_TEST]) - self.use_column_train = kwargs.get(PARAMETERS.USE_COLUMN_TRAIN, default_values[BASE_OSCILLATIONS][PARAMETERS.USE_COLUMN_TRAIN]) - self.use_column_test = kwargs.get(PARAMETERS.USE_COLUMN_TEST, default_values[BASE_OSCILLATIONS][PARAMETERS.USE_COLUMN_TEST]) + self.random_seed = kwargs.get( + PARAMETERS.RANDOM_SEED, + default_values[BASE_OSCILLATIONS][PARAMETERS.RANDOM_SEED], + ) + self.formula = kwargs.get( + PARAMETERS.FORMULA, default_values[BASE_OSCILLATIONS][PARAMETERS.FORMULA] + ) + self.ecg_sim_method = kwargs.get( + PARAMETERS.ECG_SIM_METHOD, + default_values[BASE_OSCILLATIONS][PARAMETERS.ECG_SIM_METHOD], + ) + self.width = kwargs.get( + PARAMETERS.WIDTH, default_values[BASE_OSCILLATIONS][PARAMETERS.WIDTH] + ) + self.duty = kwargs.get( + PARAMETERS.DUTY, default_values[BASE_OSCILLATIONS][PARAMETERS.DUTY] + ) + self.periodicity = kwargs.get( + PARAMETERS.PERIODICITY, + default_values[BASE_OSCILLATIONS][PARAMETERS.PERIODICITY], + ) + self.complexity = kwargs.get( + PARAMETERS.COMPLEXITY, + default_values[BASE_OSCILLATIONS][PARAMETERS.COMPLEXITY], + ) + self.input_timeseries_path_train = kwargs.get( + PARAMETERS.INPUT_TIMESERIES_PATH_TRAIN, + default_values[BASE_OSCILLATIONS][PARAMETERS.INPUT_TIMESERIES_PATH_TRAIN], + ) + self.input_timeseries_path_test = kwargs.get( + PARAMETERS.INPUT_TIMESERIES_PATH_TEST, + default_values[BASE_OSCILLATIONS][PARAMETERS.INPUT_TIMESERIES_PATH_TEST], + ) + self.use_column_train = kwargs.get( + PARAMETERS.USE_COLUMN_TRAIN, + default_values[BASE_OSCILLATIONS][PARAMETERS.USE_COLUMN_TRAIN], + ) + self.use_column_test = kwargs.get( + PARAMETERS.USE_COLUMN_TEST, + default_values[BASE_OSCILLATIONS][PARAMETERS.USE_COLUMN_TEST], + ) # BO components self.timeseries: Optional[np.ndarray] = None self.noise: Optional[np.ndarray] = None self.trend_series: Optional[np.ndarray] = None - self.offset = kwargs.get(PARAMETERS.OFFSET, default_values[BASE_OSCILLATIONS][PARAMETERS.OFFSET]) + self.offset = kwargs.get( + PARAMETERS.OFFSET, default_values[BASE_OSCILLATIONS][PARAMETERS.OFFSET] + ) - def generate_noise(self, ctx: BOGenerationContext, variance: float, length: int) -> np.ndarray: + def generate_noise( + self, ctx: BOGenerationContext, variance: float, length: int + ) -> np.ndarray: return ctx.rng.normal(0, variance, length) def _generate_trend(self, ctx: BOGenerationContext) -> np.ndarray: @@ -59,7 +125,9 @@ def _generate_trend(self, ctx: BOGenerationContext) -> np.ndarray: def generate_timeseries_and_variations(self, ctx: BOGenerationContext, **kwargs): self.timeseries = self.generate_only_base(ctx, **kwargs) self.trend_series = self._generate_trend(ctx.to_trend()) - self.noise = self.generate_noise(ctx, self.variance * self.amplitude, self.length) + self.noise = self.generate_noise( + ctx, self.variance * self.amplitude, self.length + ) def is_periodic(self) -> bool: periods = self.get_timeseries_periods() @@ -88,7 +156,9 @@ def get_base_oscillation_kind(self) -> str: raise NotImplementedError() @abstractmethod - def generate_only_base(self, ctx: BOGenerationContext, *args, **kwargs) -> np.ndarray: + def generate_only_base( + self, ctx: BOGenerationContext, *args, **kwargs + ) -> np.ndarray: raise NotImplementedError() @classmethod diff --git a/gutenTAG/base_oscillations/mls.py b/gutenTAG/base_oscillations/mls.py index 9e33f66..856ebd9 100644 --- a/gutenTAG/base_oscillations/mls.py +++ b/gutenTAG/base_oscillations/mls.py @@ -9,7 +9,11 @@ from .interface import BaseOscillationInterface from .utils.math_func_support import SAMPLING_F from ..utils.default_values import default_values -from ..utils.global_variables import BASE_OSCILLATION_NAMES, BASE_OSCILLATIONS, PARAMETERS +from ..utils.global_variables import ( + BASE_OSCILLATION_NAMES, + BASE_OSCILLATIONS, + PARAMETERS, +) from ..utils.types import BOGenerationContext @@ -20,18 +24,21 @@ def get_base_oscillation_kind(self) -> str: return self.KIND def get_timeseries_periods(self) -> Optional[int]: - return self.length // (2 ** self.complexity - 1) + return self.length // (2**self.complexity - 1) def get_period_size(self) -> Optional[int]: - return 2 ** self.complexity - 1 + return 2**self.complexity - 1 - def generate_only_base(self, - ctx: BOGenerationContext, - length: Optional[int] = None, - amplitude: Optional[float] = None, - smoothing: Optional[float] = None, - complexity: Optional[int] = None, - *args, **kwargs) -> np.ndarray: + def generate_only_base( + self, + ctx: BOGenerationContext, + length: Optional[int] = None, + amplitude: Optional[float] = None, + smoothing: Optional[float] = None, + complexity: Optional[int] = None, + *args, + **kwargs + ) -> np.ndarray: n: int = length or self.length # in points a: float = amplitude or self.amplitude v_smoothing: float = smoothing or self.smoothing @@ -40,15 +47,19 @@ def generate_only_base(self, return mls(ctx.rng, n, a, v_smoothing, v_complexity) -def mls(rng: np.random.Generator = np.random.default_rng(), - length: int = default_values[BASE_OSCILLATIONS][PARAMETERS.LENGTH], - amplitude: float = default_values[BASE_OSCILLATIONS][PARAMETERS.AMPLITUDE], - smoothing: float = default_values[BASE_OSCILLATIONS][PARAMETERS.SMOOTHING], - complexity: int = default_values[BASE_OSCILLATIONS][PARAMETERS.COMPLEXITY]) -> np.ndarray: +def mls( + rng: np.random.Generator = np.random.default_rng(), + length: int = default_values[BASE_OSCILLATIONS][PARAMETERS.LENGTH], + amplitude: float = default_values[BASE_OSCILLATIONS][PARAMETERS.AMPLITUDE], + smoothing: float = default_values[BASE_OSCILLATIONS][PARAMETERS.SMOOTHING], + complexity: int = default_values[BASE_OSCILLATIONS][PARAMETERS.COMPLEXITY], +) -> np.ndarray: assert 1 < complexity < 16, "Complexity should be between 1 and 16 inclusive!" taps = rng.integers(1, complexity, endpoint=True, size=rng.integers(1, 3)) - state = np.r_[1, rng.integers(0, 1, endpoint=True, size=complexity - 1, dtype=np.int8)] + state = np.r_[ + 1, rng.integers(0, 1, endpoint=True, size=complexity - 1, dtype=np.int8) + ] if smoothing is not None and smoothing > 0: filter_size = int(smoothing * SAMPLING_F) new_n = length + filter_size - 1 @@ -63,7 +74,11 @@ def mls(rng: np.random.Generator = np.random.default_rng(), data = data.cumsum() data = np.tile(data, (length // data.shape[0]) + 1)[:length] - data = MinMaxScaler((-amplitude, amplitude)).fit_transform(data.reshape(-1, 1)).reshape(-1) + data = ( + MinMaxScaler((-amplitude, amplitude)) + .fit_transform(data.reshape(-1, 1)) + .reshape(-1) + ) return data diff --git a/gutenTAG/base_oscillations/polynomial.py b/gutenTAG/base_oscillations/polynomial.py index 23050d1..4a4aeab 100644 --- a/gutenTAG/base_oscillations/polynomial.py +++ b/gutenTAG/base_oscillations/polynomial.py @@ -5,7 +5,11 @@ from . import BaseOscillation from .interface import BaseOscillationInterface from ..utils.default_values import default_values -from ..utils.global_variables import BASE_OSCILLATION_NAMES, BASE_OSCILLATIONS, PARAMETERS +from ..utils.global_variables import ( + BASE_OSCILLATION_NAMES, + BASE_OSCILLATIONS, + PARAMETERS, +) from ..utils.types import BOGenerationContext @@ -18,19 +22,24 @@ def get_base_oscillation_kind(self) -> str: def get_timeseries_periods(self) -> Optional[int]: return None - def generate_only_base(self, - ctx: BOGenerationContext, - length: Optional[int] = None, - polynom: Optional[List[float]] = None, - *args, **kwargs) -> np.ndarray: + def generate_only_base( + self, + ctx: BOGenerationContext, + length: Optional[int] = None, + polynom: Optional[List[float]] = None, + *args, + **kwargs + ) -> np.ndarray: length = length or self.length polynom = polynom or self.polynomial return polynomial(length, polynom) -def polynomial(length: int = default_values[BASE_OSCILLATIONS][PARAMETERS.LENGTH], - polynomial: List[float] = default_values[BASE_OSCILLATIONS][PARAMETERS.POLYNOMIAL]) -> np.ndarray: +def polynomial( + length: int = default_values[BASE_OSCILLATIONS][PARAMETERS.LENGTH], + polynomial: List[float] = default_values[BASE_OSCILLATIONS][PARAMETERS.POLYNOMIAL], +) -> np.ndarray: return np.polynomial.Polynomial(polynomial)(np.arange(length)) diff --git a/gutenTAG/base_oscillations/random_mode_jump.py b/gutenTAG/base_oscillations/random_mode_jump.py index 2c62e23..029832e 100644 --- a/gutenTAG/base_oscillations/random_mode_jump.py +++ b/gutenTAG/base_oscillations/random_mode_jump.py @@ -5,7 +5,11 @@ from . import BaseOscillation from .interface import BaseOscillationInterface from ..utils.default_values import default_values -from ..utils.global_variables import BASE_OSCILLATION_NAMES, BASE_OSCILLATIONS, PARAMETERS +from ..utils.global_variables import ( + BASE_OSCILLATION_NAMES, + BASE_OSCILLATIONS, + PARAMETERS, +) from ..utils.types import BOGenerationContext @@ -25,47 +29,66 @@ def is_periodic(self) -> bool: """RandomModeJump has reoccurring modes but no fixed periodicity!""" return False - def generate_only_base(self, - ctx: BOGenerationContext, - length: Optional[int] = None, - frequency: Optional[float] = None, - channel_diff: Optional[float] = None, - channel_offset: Optional[float] = None, - random_seed: Optional[int] = None, - *args, **kwargs) -> np.ndarray: + def generate_only_base( + self, + ctx: BOGenerationContext, + length: Optional[int] = None, + frequency: Optional[float] = None, + channel_diff: Optional[float] = None, + channel_offset: Optional[float] = None, + random_seed: Optional[int] = None, + *args, + **kwargs + ) -> np.ndarray: length = length or self.length frequency = frequency or self.frequency channel_diff = channel_diff or self.channel_diff channel_offset = channel_offset or self.channel_offset random_seed = random_seed or self.random_seed - return random_mode_jump(ctx, length, frequency, channel_diff, channel_offset, random_seed) + return random_mode_jump( + ctx, length, frequency, channel_diff, channel_offset, random_seed + ) -def _generate_random_steps(ctx: BOGenerationContext, length: int, step_length: int, - random_seed: Optional[int] = None) -> np.ndarray: - rng = np.random.default_rng(BOGenerationContext.re_seed(random_seed, base_seed=ctx.seed)) +def _generate_random_steps( + ctx: BOGenerationContext, + length: int, + step_length: int, + random_seed: Optional[int] = None, +) -> np.ndarray: + rng = np.random.default_rng( + BOGenerationContext.re_seed(random_seed, base_seed=ctx.seed) + ) n_steps = int(np.ceil(length / step_length).item()) energy = rng.choice([-1, 1], size=n_steps) base = np.repeat(energy, step_length)[:length].astype(np.float64) return base -def _generate_channel_amplitude(channel: int, channel_diff: float, channel_offset: float) -> float: +def _generate_channel_amplitude( + channel: int, channel_diff: float, channel_offset: float +) -> float: high_val = (channel_diff * channel) + channel_offset return high_val -def random_mode_jump(ctx: BOGenerationContext = BOGenerationContext.default(), - length: int = default_values[BASE_OSCILLATIONS][PARAMETERS.LENGTH], - frequency: float = default_values[BASE_OSCILLATIONS][PARAMETERS.FREQUENCY], - channel_diff: float = default_values[BASE_OSCILLATIONS][PARAMETERS.CHANNEL_DIFF], - channel_offset: float = default_values[BASE_OSCILLATIONS][PARAMETERS.CHANNEL_OFFSET], - random_seed: Optional[int] = None) -> np.ndarray: +def random_mode_jump( + ctx: BOGenerationContext = BOGenerationContext.default(), + length: int = default_values[BASE_OSCILLATIONS][PARAMETERS.LENGTH], + frequency: float = default_values[BASE_OSCILLATIONS][PARAMETERS.FREQUENCY], + channel_diff: float = default_values[BASE_OSCILLATIONS][PARAMETERS.CHANNEL_DIFF], + channel_offset: float = default_values[BASE_OSCILLATIONS][ + PARAMETERS.CHANNEL_OFFSET + ], + random_seed: Optional[int] = None, +) -> np.ndarray: step_length = int(length // frequency) base_random_steps = _generate_random_steps(ctx, length, step_length, random_seed) - channel_amplitude = _generate_channel_amplitude(ctx.channel, channel_diff, channel_offset) + channel_amplitude = _generate_channel_amplitude( + ctx.channel, channel_diff, channel_offset + ) ts = base_random_steps * channel_amplitude return ts diff --git a/gutenTAG/base_oscillations/random_walk.py b/gutenTAG/base_oscillations/random_walk.py index 9cedd38..0ac8252 100644 --- a/gutenTAG/base_oscillations/random_walk.py +++ b/gutenTAG/base_oscillations/random_walk.py @@ -7,7 +7,11 @@ from . import BaseOscillation from .interface import BaseOscillationInterface from ..utils.default_values import default_values -from ..utils.global_variables import BASE_OSCILLATION_NAMES, BASE_OSCILLATIONS, PARAMETERS +from ..utils.global_variables import ( + BASE_OSCILLATION_NAMES, + BASE_OSCILLATIONS, + PARAMETERS, +) from ..utils.types import BOGenerationContext @@ -20,12 +24,15 @@ def get_base_oscillation_kind(self) -> str: def get_timeseries_periods(self) -> Optional[int]: return None - def generate_only_base(self, - ctx: BOGenerationContext, - length: Optional[int] = None, - amplitude: Optional[float] = None, - smoothing: Optional[float] = None, - *args, **kwargs) -> np.ndarray: + def generate_only_base( + self, + ctx: BOGenerationContext, + length: Optional[int] = None, + amplitude: Optional[float] = None, + smoothing: Optional[float] = None, + *args, + **kwargs + ) -> np.ndarray: length = length or self.length amplitude = amplitude or self.amplitude smoothing = smoothing or self.smoothing @@ -34,14 +41,16 @@ def generate_only_base(self, def _gen_steps(rng: np.random.Generator, length: int) -> np.ndarray: - steps = rng.choice([-1., 0., 1.], size=length - 1) + steps = rng.choice([-1.0, 0.0, 1.0], size=length - 1) return np.r_[0, steps].cumsum() -def random_walk(rng: np.random.Generator = np.random.default_rng(), - length: int = default_values[BASE_OSCILLATIONS][PARAMETERS.LENGTH], - amplitude: float = default_values[BASE_OSCILLATIONS][PARAMETERS.AMPLITUDE], - smoothing: float = default_values[BASE_OSCILLATIONS][PARAMETERS.SMOOTHING]) -> np.ndarray: +def random_walk( + rng: np.random.Generator = np.random.default_rng(), + length: int = default_values[BASE_OSCILLATIONS][PARAMETERS.LENGTH], + amplitude: float = default_values[BASE_OSCILLATIONS][PARAMETERS.AMPLITUDE], + smoothing: float = default_values[BASE_OSCILLATIONS][PARAMETERS.SMOOTHING], +) -> np.ndarray: if smoothing: filter_size = int(smoothing * length) ts = _gen_steps(rng, length + filter_size - 1) @@ -51,7 +60,11 @@ def random_walk(rng: np.random.Generator = np.random.default_rng(), else: ts = _gen_steps(rng, length) - return MinMaxScaler(feature_range=(-amplitude, amplitude)).fit_transform(ts.reshape(-1, 1)).reshape(-1) + return ( + MinMaxScaler(feature_range=(-amplitude, amplitude)) + .fit_transform(ts.reshape(-1, 1)) + .reshape(-1) + ) -BaseOscillation.register(RandomWalk.KIND, RandomWalk) \ No newline at end of file +BaseOscillation.register(RandomWalk.KIND, RandomWalk) diff --git a/gutenTAG/base_oscillations/sawtooth.py b/gutenTAG/base_oscillations/sawtooth.py index 85d3de5..8422c32 100644 --- a/gutenTAG/base_oscillations/sawtooth.py +++ b/gutenTAG/base_oscillations/sawtooth.py @@ -6,9 +6,17 @@ from . import BaseOscillation from .interface import BaseOscillationInterface -from .utils.math_func_support import prepare_base_signal, generate_periodic_signal, calc_n_periods +from .utils.math_func_support import ( + prepare_base_signal, + generate_periodic_signal, + calc_n_periods, +) from ..utils.default_values import default_values -from ..utils.global_variables import BASE_OSCILLATION_NAMES, BASE_OSCILLATIONS, PARAMETERS +from ..utils.global_variables import ( + BASE_OSCILLATION_NAMES, + BASE_OSCILLATIONS, + PARAMETERS, +) from ..utils.types import BOGenerationContext @@ -21,14 +29,17 @@ def get_base_oscillation_kind(self) -> str: def get_timeseries_periods(self) -> Optional[int]: return calc_n_periods(self.length, self.frequency) - def generate_only_base(self, - ctx: BOGenerationContext, - length: Optional[int] = None, - frequency: Optional[float] = None, - amplitude: Optional[float] = None, - freq_mod: Optional[float] = None, - width: Optional[float] = None, - *args, **kwargs) -> np.ndarray: + def generate_only_base( + self, + ctx: BOGenerationContext, + length: Optional[int] = None, + frequency: Optional[float] = None, + amplitude: Optional[float] = None, + freq_mod: Optional[float] = None, + width: Optional[float] = None, + *args, + **kwargs + ) -> np.ndarray: n: int = length or self.length # in points f: float = frequency or self.frequency # in Hz a: float = amplitude or self.amplitude @@ -38,11 +49,13 @@ def generate_only_base(self, return sawtooth(n, f, a, v_freq_mod, v_width) -def sawtooth(length: int = default_values[BASE_OSCILLATIONS][PARAMETERS.LENGTH], - frequency: float = default_values[BASE_OSCILLATIONS][PARAMETERS.FREQUENCY], - amplitude: float = default_values[BASE_OSCILLATIONS][PARAMETERS.AMPLITUDE], - freq_mod: float = default_values[BASE_OSCILLATIONS][PARAMETERS.FREQ_MOD], - width: float = default_values[BASE_OSCILLATIONS][PARAMETERS.WIDTH]) -> np.ndarray: +def sawtooth( + length: int = default_values[BASE_OSCILLATIONS][PARAMETERS.LENGTH], + frequency: float = default_values[BASE_OSCILLATIONS][PARAMETERS.FREQUENCY], + amplitude: float = default_values[BASE_OSCILLATIONS][PARAMETERS.AMPLITUDE], + freq_mod: float = default_values[BASE_OSCILLATIONS][PARAMETERS.FREQ_MOD], + width: float = default_values[BASE_OSCILLATIONS][PARAMETERS.WIDTH], +) -> np.ndarray: base_ts = prepare_base_signal(length, frequency) func = partial(signal.sawtooth, width=width) return generate_periodic_signal(base_ts, func, amplitude, freq_mod) diff --git a/gutenTAG/base_oscillations/sine.py b/gutenTAG/base_oscillations/sine.py index 7fc45e6..2dd8423 100644 --- a/gutenTAG/base_oscillations/sine.py +++ b/gutenTAG/base_oscillations/sine.py @@ -4,9 +4,17 @@ from . import BaseOscillation from .interface import BaseOscillationInterface -from .utils.math_func_support import prepare_base_signal, generate_periodic_signal, calc_n_periods +from .utils.math_func_support import ( + prepare_base_signal, + generate_periodic_signal, + calc_n_periods, +) from ..utils.default_values import default_values -from ..utils.global_variables import BASE_OSCILLATION_NAMES, BASE_OSCILLATIONS, PARAMETERS +from ..utils.global_variables import ( + BASE_OSCILLATION_NAMES, + BASE_OSCILLATIONS, + PARAMETERS, +) from ..utils.types import BOGenerationContext @@ -19,13 +27,16 @@ def get_base_oscillation_kind(self) -> str: def get_timeseries_periods(self) -> Optional[int]: return calc_n_periods(self.length, self.frequency) - def generate_only_base(self, - ctx: BOGenerationContext, - length: Optional[int] = None, - frequency: Optional[float] = None, - amplitude: Optional[float] = None, - freq_mod: Optional[float] = None, - *args, **kwargs) -> np.ndarray: + def generate_only_base( + self, + ctx: BOGenerationContext, + length: Optional[int] = None, + frequency: Optional[float] = None, + amplitude: Optional[float] = None, + freq_mod: Optional[float] = None, + *args, + **kwargs + ) -> np.ndarray: n: int = length or self.length # in points f: float = frequency or self.frequency # in Hz a: float = amplitude or self.amplitude @@ -34,10 +45,12 @@ def generate_only_base(self, return sine(n, f, a, v_freq_mod) -def sine(length: int = default_values[BASE_OSCILLATIONS][PARAMETERS.LENGTH], - frequency: float = default_values[BASE_OSCILLATIONS][PARAMETERS.FREQUENCY], - amplitude: float = default_values[BASE_OSCILLATIONS][PARAMETERS.AMPLITUDE], - freq_mod: float = default_values[BASE_OSCILLATIONS][PARAMETERS.FREQ_MOD]) -> np.ndarray: +def sine( + length: int = default_values[BASE_OSCILLATIONS][PARAMETERS.LENGTH], + frequency: float = default_values[BASE_OSCILLATIONS][PARAMETERS.FREQUENCY], + amplitude: float = default_values[BASE_OSCILLATIONS][PARAMETERS.AMPLITUDE], + freq_mod: float = default_values[BASE_OSCILLATIONS][PARAMETERS.FREQ_MOD], +) -> np.ndarray: base_ts = prepare_base_signal(length, frequency) return generate_periodic_signal(base_ts, np.sin, amplitude, freq_mod) diff --git a/gutenTAG/base_oscillations/square.py b/gutenTAG/base_oscillations/square.py index d0a95d2..d1efdcb 100644 --- a/gutenTAG/base_oscillations/square.py +++ b/gutenTAG/base_oscillations/square.py @@ -6,9 +6,17 @@ from . import BaseOscillation from .interface import BaseOscillationInterface -from .utils.math_func_support import prepare_base_signal, generate_periodic_signal, calc_n_periods +from .utils.math_func_support import ( + prepare_base_signal, + generate_periodic_signal, + calc_n_periods, +) from ..utils.default_values import default_values -from ..utils.global_variables import BASE_OSCILLATION_NAMES, PARAMETERS, BASE_OSCILLATIONS +from ..utils.global_variables import ( + BASE_OSCILLATION_NAMES, + PARAMETERS, + BASE_OSCILLATIONS, +) from ..utils.types import BOGenerationContext @@ -21,14 +29,17 @@ def get_base_oscillation_kind(self) -> str: def get_timeseries_periods(self) -> Optional[int]: return calc_n_periods(self.length, self.frequency) - def generate_only_base(self, - ctx: BOGenerationContext, - length: Optional[int] = None, - frequency: Optional[float] = None, - amplitude: Optional[float] = None, - freq_mod: Optional[float] = None, - duty: Optional[float] = None, - *args, **kwargs) -> np.ndarray: + def generate_only_base( + self, + ctx: BOGenerationContext, + length: Optional[int] = None, + frequency: Optional[float] = None, + amplitude: Optional[float] = None, + freq_mod: Optional[float] = None, + duty: Optional[float] = None, + *args, + **kwargs + ) -> np.ndarray: n: int = length or self.length # in points f: float = frequency or self.frequency # in Hz a: float = amplitude or self.amplitude @@ -38,11 +49,13 @@ def generate_only_base(self, return square(n, f, a, v_freq_mod, v_duty) -def square(length: int = default_values[BASE_OSCILLATIONS][PARAMETERS.LENGTH], - frequency: float = default_values[BASE_OSCILLATIONS][PARAMETERS.FREQUENCY], - amplitude: float = default_values[BASE_OSCILLATIONS][PARAMETERS.AMPLITUDE], - freq_mod: float = default_values[BASE_OSCILLATIONS][PARAMETERS.FREQ_MOD], - duty: float = default_values[BASE_OSCILLATIONS][PARAMETERS.DUTY]) -> np.ndarray: +def square( + length: int = default_values[BASE_OSCILLATIONS][PARAMETERS.LENGTH], + frequency: float = default_values[BASE_OSCILLATIONS][PARAMETERS.FREQUENCY], + amplitude: float = default_values[BASE_OSCILLATIONS][PARAMETERS.AMPLITUDE], + freq_mod: float = default_values[BASE_OSCILLATIONS][PARAMETERS.FREQ_MOD], + duty: float = default_values[BASE_OSCILLATIONS][PARAMETERS.DUTY], +) -> np.ndarray: base_ts = prepare_base_signal(length, frequency) func = partial(signal.square, duty=duty) return generate_periodic_signal(base_ts, func, amplitude, freq_mod) diff --git a/gutenTAG/base_oscillations/utils/math_func_support.py b/gutenTAG/base_oscillations/utils/math_func_support.py index aaf9629..c604173 100644 --- a/gutenTAG/base_oscillations/utils/math_func_support.py +++ b/gutenTAG/base_oscillations/utils/math_func_support.py @@ -44,7 +44,12 @@ def prepare_base_signal(n: int, f: float) -> np.ndarray: return base -def generate_periodic_signal(base: np.ndarray, func: Callable[[np.ndarray], np.ndarray], a: float, freq_mod: Optional[float] = None): +def generate_periodic_signal( + base: np.ndarray, + func: Callable[[np.ndarray], np.ndarray], + a: float, + freq_mod: Optional[float] = None, +): """Generates a periodic signal based on the base signal (should already contain the frequency) and applying the supplied mathematical function. ``freq_mod`` can be used to modulate the signal amplitude with another frequency. diff --git a/gutenTAG/config/parser.py b/gutenTAG/config/parser.py index 15e22ed..460fe61 100644 --- a/gutenTAG/config/parser.py +++ b/gutenTAG/config/parser.py @@ -9,7 +9,13 @@ from ..base_oscillations import BaseOscillationInterface, BaseOscillation from ..utils.compatibility import Compatibility from ..utils.default_values import default_values -from ..utils.global_variables import BASE_OSCILLATION, BASE_OSCILLATIONS, TIMESERIES, PARAMETERS, ANOMALIES +from ..utils.global_variables import ( + BASE_OSCILLATION, + BASE_OSCILLATIONS, + TIMESERIES, + PARAMETERS, + ANOMALIES, +) @dataclass @@ -25,18 +31,24 @@ def to_dict(self): def from_dict(d: Dict) -> GenerationOptions: return GenerationOptions( semi_supervised=d.get("semi-supervised", False), - supervised=d.get("supervised", False) + supervised=d.get("supervised", False), ) -ResultType = List[Tuple[List[BaseOscillationInterface], List[Anomaly], GenerationOptions, Dict]] +ResultType = List[ + Tuple[List[BaseOscillationInterface], List[Anomaly], GenerationOptions, Dict] +] -def decode_trend_obj(trend: Dict, length_overwrite: int) -> Optional[BaseOscillationInterface]: +def decode_trend_obj( + trend: Dict, length_overwrite: int +) -> Optional[BaseOscillationInterface]: trend_key = trend.get(PARAMETERS.KIND, None) trend[PARAMETERS.LENGTH] = length_overwrite if PARAMETERS.TREND in trend: - trend[PARAMETERS.TREND] = decode_trend_obj(trend[PARAMETERS.TREND], length_overwrite) + trend[PARAMETERS.TREND] = decode_trend_obj( + trend[PARAMETERS.TREND], length_overwrite + ) return BaseOscillation.from_key(trend_key, **trend) if trend_key else None @@ -70,29 +82,46 @@ def parse(self, config: Dict) -> ResultType: return self.result def _check_compatibility(self, ts: Dict) -> bool: - base_oscillations = ts.get(BASE_OSCILLATIONS, [ts.get(BASE_OSCILLATION)] * ts.get(PARAMETERS.CHANNELS, 0)) + base_oscillations = ts.get( + BASE_OSCILLATIONS, + [ts.get(BASE_OSCILLATION)] * ts.get(PARAMETERS.CHANNELS, 0), + ) anomalies = ts.get(ANOMALIES, []) for anomaly in anomalies: - base_oscillation = base_oscillations[anomaly.get(PARAMETERS.CHANNEL, default_values[ANOMALIES][PARAMETERS.CHANNEL])][PARAMETERS.KIND] + base_oscillation = base_oscillations[ + anomaly.get( + PARAMETERS.CHANNEL, default_values[ANOMALIES][PARAMETERS.CHANNEL] + ) + ][PARAMETERS.KIND] for anomaly_kind in anomaly.get(PARAMETERS.KINDS, []): anomaly_kind = anomaly_kind[PARAMETERS.KIND] if not Compatibility.check(anomaly_kind, base_oscillation): if self.skip_errors: - logging.warning(f"Skip generation of time series {ts.get('name', '')} due to incompatible types: {anomaly_kind} -> {base_oscillation}.") + logging.warning( + f"Skip generation of time series {ts.get('name', '')} due to incompatible types: {anomaly_kind} -> {base_oscillation}." + ) return False else: - raise ValueError(f"Incompatible types: {anomaly_kind} -> {base_oscillation}.") + raise ValueError( + f"Incompatible types: {anomaly_kind} -> {base_oscillation}." + ) return True def _skip_name(self, name: str) -> bool: return self.only is not None and name != self.only def _build_base_oscillations(self, d: Dict) -> List[BaseOscillationInterface]: - length = d.get(PARAMETERS.LENGTH, default_values[BASE_OSCILLATIONS][PARAMETERS.LENGTH]) - bos = d.get(BASE_OSCILLATIONS, [d.get(BASE_OSCILLATION)] * d.get(PARAMETERS.CHANNELS, 0)) + length = d.get( + PARAMETERS.LENGTH, default_values[BASE_OSCILLATIONS][PARAMETERS.LENGTH] + ) + bos = d.get( + BASE_OSCILLATIONS, [d.get(BASE_OSCILLATION)] * d.get(PARAMETERS.CHANNELS, 0) + ) return [self._build_single_base_oscillation(bo, length) for bo in bos] - def _build_single_base_oscillation(self, d: Dict, length: int) -> BaseOscillationInterface: + def _build_single_base_oscillation( + self, d: Dict, length: int + ) -> BaseOscillationInterface: base_oscillation_config = deepcopy(d) base_oscillation_config[PARAMETERS.LENGTH] = length trend = base_oscillation_config.get(PARAMETERS.TREND, {}) @@ -101,15 +130,25 @@ def _build_single_base_oscillation(self, d: Dict, length: int) -> BaseOscillatio return BaseOscillation.from_key(key, **base_oscillation_config) def _build_anomalies(self, d: Dict) -> List[Anomaly]: - return [self._build_single_anomaly(anomaly_config) for anomaly_config in d.get(ANOMALIES, [])] + return [ + self._build_single_anomaly(anomaly_config) + for anomaly_config in d.get(ANOMALIES, []) + ] def _build_single_anomaly(self, d: Dict) -> Anomaly: anomaly = Anomaly( - Position(d.get(PARAMETERS.POSITION, default_values[ANOMALIES][PARAMETERS.POSITION])), + Position( + d.get( + PARAMETERS.POSITION, default_values[ANOMALIES][PARAMETERS.POSITION] + ) + ), d.get(PARAMETERS.EXACT_POSITION, None), d[PARAMETERS.LENGTH], d.get(PARAMETERS.CHANNEL, default_values[ANOMALIES][PARAMETERS.CHANNEL]), - d.get(PARAMETERS.CREEPING_LENGTH, default_values[ANOMALIES][PARAMETERS.CREEPING_LENGTH]) + d.get( + PARAMETERS.CREEPING_LENGTH, + default_values[ANOMALIES][PARAMETERS.CREEPING_LENGTH], + ), ) anomaly_kinds = self._build_anomaly_kinds(d, anomaly.anomaly_length) @@ -119,13 +158,18 @@ def _build_single_anomaly(self, d: Dict) -> Anomaly: return anomaly def _build_anomaly_kinds(self, d: Dict, length: int) -> List[BaseAnomaly]: - return [self._build_single_anomaly_kind(anomaly_kind, length) for anomaly_kind in d.get(PARAMETERS.KINDS, [])] + return [ + self._build_single_anomaly_kind(anomaly_kind, length) + for anomaly_kind in d.get(PARAMETERS.KINDS, []) + ] def _build_single_anomaly_kind(self, d: Dict, length: int) -> BaseAnomaly: kind = d[PARAMETERS.KIND] if kind == PARAMETERS.TREND: parameters = { - PARAMETERS.TREND: decode_trend_obj(deepcopy(d[PARAMETERS.OSCILLATION]), length) + PARAMETERS.TREND: decode_trend_obj( + deepcopy(d[PARAMETERS.OSCILLATION]), length + ) } else: parameters = deepcopy(d) @@ -135,6 +179,8 @@ def _build_single_anomaly_kind(self, d: Dict, length: int) -> BaseAnomaly: except TypeError as ex: if "unexpected keyword argument" in str(ex): parameter = str(ex).split("'")[-2] - raise ValueError(f"Anomaly kind '{kind}' does not support parameter '{parameter}'") from ex + raise ValueError( + f"Anomaly kind '{kind}' does not support parameter '{parameter}'" + ) from ex else: raise ex diff --git a/gutenTAG/config/schema_loader.py b/gutenTAG/config/schema_loader.py index 2c06982..e68f2ee 100644 --- a/gutenTAG/config/schema_loader.py +++ b/gutenTAG/config/schema_loader.py @@ -15,7 +15,6 @@ def load_schema_file(self, schema_id: str) -> Dict: class FileSystemConfigSchemaLoader(ConfigSchemaLoader): - def __init__(self, base_path: Path = CONFIG_SCHEMA.SCHEMA_FOLDER_PATH): super().__init__() self.base_path = base_path @@ -31,6 +30,7 @@ def from_packaged_schema() -> "ConfigSchemaLoader": current_path = Path(os.path.dirname(__file__)).absolute() return FileSystemConfigSchemaLoader(current_path / "schema") + # We could add a second method of loading the schema using the python package structure similar to # https://github.com/pallets/jinja/blob/c3a61d6ef654f389ea2bdeddce0ffc74d656be8b/src/jinja2/loaders.py#L238 diff --git a/gutenTAG/config/validator.py b/gutenTAG/config/validator.py index 6a4461d..8b13d5a 100644 --- a/gutenTAG/config/validator.py +++ b/gutenTAG/config/validator.py @@ -1,4 +1,4 @@ -from typing import Dict, Optional +from typing import Any, Dict, Optional import jsonschema from jsonschema import RefResolver @@ -6,8 +6,14 @@ from ..anomalies import AnomalyKind from ..base_oscillations import BaseOscillation from ..config.schema_loader import ConfigSchemaLoader, FileSystemConfigSchemaLoader -from ..utils.global_variables import CONFIG_SCHEMA, TIMESERIES, PARAMETERS, ANOMALIES, BASE_OSCILLATION, \ - BASE_OSCILLATIONS +from ..utils.global_variables import ( + CONFIG_SCHEMA, + TIMESERIES, + PARAMETERS, + ANOMALIES, + BASE_OSCILLATION, + BASE_OSCILLATIONS, +) class GutenTAGParseError(BaseException): @@ -17,7 +23,9 @@ def __init__(self, prefix: str = "", msg: Optional[str] = None): prefix = "" if prefix: prefix = f" {prefix}" - super(GutenTAGParseError, self).__init__(f"Error in generation configuration{prefix}: {msg}") + super(GutenTAGParseError, self).__init__( + f"Error in generation configuration{prefix}: {msg}" + ) class ConfigValidator: @@ -36,9 +44,7 @@ def __init__(self): # create resolver containing all schema parts self.base_schema = base_schema self.resolver = RefResolver( - base_uri=base_schema_name, - referrer=base_schema, - store=schema_parts + base_uri=base_schema_name, referrer=base_schema, store=schema_parts ) def validate(self, config: Dict) -> None: @@ -58,40 +64,67 @@ def gutentag_validate(config: Dict) -> None: raise GutenTAGParseError(log_prefix, f"Missing '{ANOMALIES}' property.") if BASE_OSCILLATION not in ts and BASE_OSCILLATIONS not in ts: - raise GutenTAGParseError(log_prefix, f"Missing '{BASE_OSCILLATIONS}' property.") + raise GutenTAGParseError( + log_prefix, f"Missing '{BASE_OSCILLATIONS}' property." + ) if BASE_OSCILLATION in ts and PARAMETERS.CHANNELS not in ts: raise GutenTAGParseError( log_prefix, - f"If a single '{BASE_OSCILLATION}' is defined, the property '{PARAMETERS.CHANNELS}' is required." + f"If a single '{BASE_OSCILLATION}' is defined, the property '{PARAMETERS.CHANNELS}' is required.", ) # check base oscillations - bos = ts.get(BASE_OSCILLATIONS, [ts.get(BASE_OSCILLATION)] * ts.get(PARAMETERS.CHANNELS, 0)) + bos = ts.get( + BASE_OSCILLATIONS, + [ts.get(BASE_OSCILLATION)] * ts.get(PARAMETERS.CHANNELS, 0), + ) for i, bo in enumerate(bos): - log_prefix_bo = f"{log_prefix} BO {i}" - if PARAMETERS.KIND not in bo: - raise GutenTAGParseError(log_prefix_bo, f"Missing required property '{PARAMETERS.KIND}'.") - bo_kind = bo[PARAMETERS.KIND] - if bo_kind not in BaseOscillation.key_mapping: - raise GutenTAGParseError(log_prefix_bo, f"Base oscillation kind '{bo_kind}' is not supported!") + ConfigValidator._validate_bo(i, bo, log_prefix) # check anomaly definitions anoms = ts.get(ANOMALIES, []) for i, anom in enumerate(anoms): - log_prefix_anom = f"{log_prefix} Anom {i}" - if PARAMETERS.KINDS not in anom: - raise GutenTAGParseError(log_prefix_anom, f"Missing required property '{PARAMETERS.KINDS}'.") - if PARAMETERS.LENGTH not in anom: - raise GutenTAGParseError(log_prefix_anom, f"Missing required property '{PARAMETERS.LENGTH}'.") - - kinds = anom.get(PARAMETERS.KINDS, []) - for j, anom_kind in enumerate(kinds): - log_prefix_kind = f"{log_prefix_anom} Kind {j}" - if PARAMETERS.KIND not in anom_kind: - raise GutenTAGParseError(log_prefix_kind, f"Missing required property '{PARAMETERS.KIND}'.") - if not AnomalyKind.has_value(anom_kind[PARAMETERS.KIND]): - raise GutenTAGParseError( - log_prefix_kind, - f"Anomaly kind '{anom_kind[PARAMETERS.KIND]}' is not supported!" - ) + ConfigValidator._validate_anomaly(i, anom, log_prefix) + + @staticmethod + def _validate_bo(i: int, bo: Dict[str, Any], log_prefix: str) -> None: + log_prefix_bo = f"{log_prefix} BO {i}" + if PARAMETERS.KIND not in bo: + raise GutenTAGParseError( + log_prefix_bo, f"Missing required property '{PARAMETERS.KIND}'." + ) + bo_kind = bo[PARAMETERS.KIND] + if bo_kind not in BaseOscillation.key_mapping: + raise GutenTAGParseError( + log_prefix_bo, + f"Base oscillation kind '{bo_kind}' is not supported!", + ) + + @staticmethod + def _validate_anomaly(i: int, anom: Dict[str, Any], log_prefix: str) -> None: + log_prefix_anom = f"{log_prefix} Anom {i}" + if PARAMETERS.KINDS not in anom: + raise GutenTAGParseError( + log_prefix_anom, + f"Missing required property '{PARAMETERS.KINDS}'.", + ) + if PARAMETERS.LENGTH not in anom: + raise GutenTAGParseError( + log_prefix_anom, + f"Missing required property '{PARAMETERS.LENGTH}'.", + ) + + kinds = anom.get(PARAMETERS.KINDS, []) + for j, anom_kind in enumerate(kinds): + log_prefix_kind = f"{log_prefix_anom} Kind {j}" + if PARAMETERS.KIND not in anom_kind: + raise GutenTAGParseError( + log_prefix_kind, + f"Missing required property '{PARAMETERS.KIND}'.", + ) + if not AnomalyKind.has_value(anom_kind[PARAMETERS.KIND]): + raise GutenTAGParseError( + log_prefix_kind, + f"Anomaly kind '{anom_kind[PARAMETERS.KIND]}' is not supported!", + ) diff --git a/gutenTAG/consolidator.py b/gutenTAG/consolidator.py index ab58e12..5010014 100644 --- a/gutenTAG/consolidator.py +++ b/gutenTAG/consolidator.py @@ -8,12 +8,14 @@ class Consolidator: - def __init__(self, - base_oscillations: List[BaseOscillationInterface], - anomalies: List[Anomaly], - random_seed: Optional[int] = None, - semi_supervised: Optional[bool] = None, - supervised: Optional[bool] = None): + def __init__( + self, + base_oscillations: List[BaseOscillationInterface], + anomalies: List[Anomaly], + random_seed: Optional[int] = None, + semi_supervised: Optional[bool] = None, + supervised: Optional[bool] = None, + ): self.consolidated_channels: List[BaseOscillationInterface] = base_oscillations self.anomalies: List[Anomaly] = anomalies self.generated_anomalies: List[Tuple[AnomalyProtocol, int]] = [] @@ -21,7 +23,7 @@ def __init__(self, self.labels: Optional[np.ndarray] = None self.random_seed: Optional[int] = random_seed self.semi_supervised: Optional[bool] = semi_supervised - self.supervised: Optional[bool] = supervised + self.supervised: Optional[bool] = supervised def add_channel(self, channel: BaseOscillationInterface): self.consolidated_channels.append(channel) @@ -32,10 +34,11 @@ def get_channel(self, channel: int) -> BaseOscillationInterface: def generate(self, ctx: GenerationContext) -> Tuple[np.ndarray, np.ndarray]: channels: List[np.ndarray] = [] for c, bo in enumerate(self.consolidated_channels): - bo.generate_timeseries_and_variations(ctx.to_bo(c, channels), - semi_supervised=self.semi_supervised, - supervised=self.supervised, - ) # type: ignore # timeseries gets set in generate_timeseries_and_variations() + bo.generate_timeseries_and_variations( + ctx.to_bo(c, channels), + semi_supervised=self.semi_supervised, + supervised=self.supervised, + ) # type: ignore # timeseries gets set in generate_timeseries_and_variations() if bo.timeseries is not None: channels.append(bo.timeseries) self.timeseries = self._stack_channels(channels) @@ -52,10 +55,10 @@ def apply_variations(self): def apply_anomalies(self): label_ranges: List[LabelRange] = [] - for (protocol, channel) in self.generated_anomalies: + for protocol, channel in self.generated_anomalies: if len(protocol.subsequences) > 0: subsequence = np.vstack(protocol.subsequences).sum(axis=0) - self.timeseries[protocol.start:protocol.end, channel] = subsequence + self.timeseries[protocol.start : protocol.end, channel] = subsequence label_ranges.append(protocol.labels) self._add_label_ranges_to_labels(label_ranges) @@ -64,17 +67,25 @@ def generate_anomalies(self, ctx: GenerationContext): positions: List[Tuple[int, int]] = [] for anomaly in self.anomalies: current_base_oscillation = self.consolidated_channels[anomaly.channel] - anomaly_protocol = anomaly.generate(ctx.to_anomaly(current_base_oscillation, positions)) + anomaly_protocol = anomaly.generate( + ctx.to_anomaly(current_base_oscillation, positions) + ) positions.append((anomaly_protocol.start, anomaly_protocol.end)) self.generated_anomalies.append((anomaly_protocol, anomaly.channel)) def _stack_channels(self, channels: List[np.ndarray]) -> np.ndarray: - assert all([len(x.shape) == 1 for x in channels]), "The resulting channels have the wrong shape. Correct shape: `(l, d)`." + assert all( + [len(x.shape) == 1 for x in channels] + ), "The resulting channels have the wrong shape. Correct shape: `(l, d)`." return np.vstack(channels).transpose() def _add_label_ranges_to_labels(self, label_ranges: List[LabelRange]): if self.labels is not None: for label_range in label_ranges: - self.labels[label_range.start:label_range.start + label_range.length] = 1 + self.labels[ + label_range.start : label_range.start + label_range.length + ] = 1 else: - raise AssertionError("You cannot run this method before initializing the `labels` field!") + raise AssertionError( + "You cannot run this method before initializing the `labels` field!" + ) diff --git a/gutenTAG/generator/overview.py b/gutenTAG/generator/overview.py index 2e454cb..59923d3 100644 --- a/gutenTAG/generator/overview.py +++ b/gutenTAG/generator/overview.py @@ -19,16 +19,29 @@ def _decode_numpy_types(self, obj: Any) -> Any: Taken from [numpyencoder](https://github.com/hmallen/numpyencoder/blob/f8199a61ccde25f829444a9df4b21bcb2d1de8f2/numpyencoder/numpyencoder.py) """ - if isinstance(obj, (np.int_, np.intc, np.intp, np.int8, - np.int16, np.int32, np.int64, np.uint8, - np.uint16, np.uint32, np.uint64)): + if isinstance( + obj, + ( + np.int_, + np.intc, + np.intp, + np.int8, + np.int16, + np.int32, + np.int64, + np.uint8, + np.uint16, + np.uint32, + np.uint64, + ), + ): return int(obj) elif isinstance(obj, (np.float_, np.float16, np.float32, np.float64)): return float(obj) elif isinstance(obj, (np.complex_, np.complex64, np.complex128)): - return {'real': obj.real, 'imag': obj.imag} + return {"real": obj.real, "imag": obj.imag} elif isinstance(obj, (np.ndarray,)): return obj.tolist() @@ -58,7 +71,9 @@ def __init__(self): self.seed: Optional[int] = None try: - self.git_commit_sha = git.Repo(search_parent_directories=True).head.object.hexsha + self.git_commit_sha = git.Repo( + search_parent_directories=True + ).head.object.hexsha except git.InvalidGitRepositoryError: self.git_commit_sha = None @@ -81,7 +96,9 @@ def save_to_output_dir(self, path: os.PathLike) -> None: overview: Dict[str, Any] = dict() overview["generated-timeseries"] = [] for i, dataset in enumerate(self.datasets): - dataset["generation-id"] = dataset.get("base_oscillation", {}).get("title", i) + dataset["generation-id"] = dataset.get("base_oscillation", {}).get( + "title", i + ) overview["generated-timeseries"].append(dataset) overview["meta"] = {} diff --git a/gutenTAG/generator/timeseries.py b/gutenTAG/generator/timeseries.py index c536d76..90c5002 100644 --- a/gutenTAG/generator/timeseries.py +++ b/gutenTAG/generator/timeseries.py @@ -12,13 +12,24 @@ from ..anomalies import Anomaly from ..base_oscillations import BaseOscillationInterface from ..consolidator import Consolidator -from ..timeseries import TrainingType, INDEX_COLUMN_NAME, LABEL_COLUMN_NAME, TimeSeries as ExtTimeSeries +from ..timeseries import ( + TrainingType, + INDEX_COLUMN_NAME, + LABEL_COLUMN_NAME, + TimeSeries as ExtTimeSeries, +) from ..utils.types import GenerationContext class TimeSeries: - def __init__(self, base_oscillations: List[BaseOscillationInterface], anomalies: List[Anomaly], - dataset_name: str, semi_supervised: bool = False, supervised: bool = False): + def __init__( + self, + base_oscillations: List[BaseOscillationInterface], + anomalies: List[Anomaly], + dataset_name: str, + semi_supervised: bool = False, + supervised: bool = False, + ): self.dataset_name = dataset_name self.base_oscillations = base_oscillations self.anomalies = anomalies @@ -34,31 +45,45 @@ def __init__(self, base_oscillations: List[BaseOscillationInterface], anomalies: def generate(self, random_seed: Optional[int] = None) -> TimeSeries: consolidator = Consolidator(self.base_oscillations, self.anomalies) - self.timeseries, self.labels = consolidator.generate(GenerationContext(seed=self._create_new_seed(random_seed))) + self.timeseries, self.labels = consolidator.generate( + GenerationContext(seed=self._create_new_seed(random_seed)) + ) if self.semi_supervised: - semi_supervised_consolidator = Consolidator(self.base_oscillations, [], - semi_supervised=self.semi_supervised) - self.semi_supervised_timeseries, self.semi_train_labels = semi_supervised_consolidator.generate( + semi_supervised_consolidator = Consolidator( + self.base_oscillations, [], semi_supervised=self.semi_supervised + ) + ( + self.semi_supervised_timeseries, + self.semi_train_labels, + ) = semi_supervised_consolidator.generate( GenerationContext(seed=self._create_new_seed(random_seed)) ) if self.supervised: - supervised_consolidator = Consolidator(self.base_oscillations, self.anomalies, - supervised=self.supervised) - self.supervised_timeseries, self.train_labels = supervised_consolidator.generate( + supervised_consolidator = Consolidator( + self.base_oscillations, self.anomalies, supervised=self.supervised + ) + ( + self.supervised_timeseries, + self.train_labels, + ) = supervised_consolidator.generate( GenerationContext(seed=self._create_new_seed(random_seed)) ) return self - def generate_with_dataframe(self, random_seed: Optional[int] = None) -> pd.DataFrame: + def generate_with_dataframe( + self, random_seed: Optional[int] = None + ) -> pd.DataFrame: self.generate(random_seed) return self.to_dataframe() def plot(self) -> None: n_series = 1 + np.sum([self.semi_supervised, self.supervised]) - fig, axs = plt.subplots(2, n_series, sharex="col", sharey="row", figsize=(6*n_series, 5)) + fig, axs = plt.subplots( + 2, n_series, sharex="col", sharey="row", figsize=(6 * n_series, 5) + ) # fix indexing, because subplots only returns a 1-dim array in this case: if n_series == 1: axs = np.array([axs]).T @@ -66,21 +91,37 @@ def plot(self) -> None: fig.suptitle(self.dataset_name) names: List[str] = ["test"] - assert self.timeseries is not None, "TimeSeries is not generated. Please, do so before plotting!" - assert self.labels is not None, "TimeSeries is not generated. Please, do so before plotting!" + assert ( + self.timeseries is not None + ), "TimeSeries is not generated. Please, do so before plotting!" + assert ( + self.labels is not None + ), "TimeSeries is not generated. Please, do so before plotting!" series: List[np.ndarray] = [self.timeseries] labels: List[np.ndarray] = [self.labels] - if self.supervised and self.supervised_timeseries is not None and self.train_labels is not None: + if ( + self.supervised + and self.supervised_timeseries is not None + and self.train_labels is not None + ): names.append("train_supervised") series.append(self.supervised_timeseries) labels.append(self.train_labels) - if self.semi_supervised and self.semi_supervised_timeseries is not None and self.semi_train_labels is not None: + if ( + self.semi_supervised + and self.semi_supervised_timeseries is not None + and self.semi_train_labels is not None + ): names.append("train_semi-supervised") series.append(self.semi_supervised_timeseries) labels.append(self.semi_train_labels) for i, (name, ts, label) in enumerate(zip(names, series, labels)): axs[0, i].set_title(name) - name_list = list(map(lambda j: f"channel-{j}", range(ts.shape[1]))) if ts.shape[1] > 1 else "time series" + name_list = ( + list(map(lambda j: f"channel-{j}", range(ts.shape[1]))) + if ts.shape[1] > 1 + else "time series" + ) axs[0, i].plot(ts, label=name_list) axs[1, i].plot(label, color="orange", label="ground truth") axs[0, 0].legend() @@ -88,10 +129,16 @@ def plot(self) -> None: plt.show() def build_figure_base_oscillation(self) -> plt.Figure: - assert self.timeseries is not None, "TimeSeries is not generated. Please, do so before building a figure!" + assert ( + self.timeseries is not None + ), "TimeSeries is not generated. Please, do so before building a figure!" channels = self.timeseries.shape[1] - name = list(map(lambda j: f"channel-{j}", range(channels))) if channels > 1 else "time series" + name = ( + list(map(lambda j: f"channel-{j}", range(channels))) + if channels > 1 + else "time series" + ) fig, ax = plt.subplots() ax.plot(self.timeseries, label=name) @@ -106,14 +153,18 @@ def to_datasets(self) -> List[ExtTimeSeries]: training_types.append(TrainingType.TRAIN_ANOMALIES) for training_type in training_types: - results.append(ExtTimeSeries( - name=self.dataset_name, - training_type=training_type, - timeseries=self.to_dataframe(training_type) - )) + results.append( + ExtTimeSeries( + name=self.dataset_name, + training_type=training_type, + timeseries=self.to_dataframe(training_type), + ) + ) return results - def to_dataframe(self, training_type: TrainingType = TrainingType.TEST) -> pd.DataFrame: + def to_dataframe( + self, training_type: TrainingType = TrainingType.TEST + ) -> pd.DataFrame: if training_type == TrainingType.TEST: ts, labels = self.timeseries, self.labels elif training_type == TrainingType.TRAIN_NO_ANOMALIES: @@ -121,7 +172,9 @@ def to_dataframe(self, training_type: TrainingType = TrainingType.TEST) -> pd.Da else: # if training_type == TrainingType.TRAIN_ANOMALIES: ts, labels = self.supervised_timeseries, self.train_labels - assert ts is not None, f"The timeseries for {training_type.value} must be generated before creating a DataFrame" + assert ( + ts is not None + ), f"The timeseries for {training_type.value} must be generated before creating a DataFrame" if labels is None: labels = np.zeros(ts.shape[0]) channel_names = list(map(lambda i: f"value-{i}", range(ts.shape[1]))) @@ -130,7 +183,9 @@ def to_dataframe(self, training_type: TrainingType = TrainingType.TEST) -> pd.Da df[LABEL_COLUMN_NAME] = labels return df - def to_csv(self, output_dir: Path, training_type: TrainingType = TrainingType.TEST) -> None: + def to_csv( + self, output_dir: Path, training_type: TrainingType = TrainingType.TEST + ) -> None: df = self.to_dataframe(training_type) df.to_csv(output_dir, sep=",", index=True) @@ -139,7 +194,11 @@ def _create_new_seed(self, base_seed: Optional[int]) -> SeedSequence: base_seed1: Union[int, SeedSequence] = SeedSequence() else: base_seed1 = base_seed - seeds = [int.from_bytes(md5(self.dataset_name.encode("utf-8")).digest(), byteorder="big")] + seeds = [ + int.from_bytes( + md5(self.dataset_name.encode("utf-8")).digest(), byteorder="big" + ) + ] if self._rng_counter > 0: seeds.append(self._rng_counter) self._rng_counter += 1 diff --git a/gutenTAG/gutenTAG.py b/gutenTAG/gutenTAG.py index 8ac8b81..a4c1a98 100644 --- a/gutenTAG/gutenTAG.py +++ b/gutenTAG/gutenTAG.py @@ -15,7 +15,11 @@ from .config import ConfigParser, ConfigValidator from .generator import Overview, TimeSeries from .timeseries import TrainingType, TimeSeries as ExtTimeSeries -from .utils.global_variables import UNSUPERVISED_FILENAME, SUPERVISED_FILENAME, SEMI_SUPERVISED_FILENAME +from .utils.global_variables import ( + UNSUPERVISED_FILENAME, + SUPERVISED_FILENAME, + SEMI_SUPERVISED_FILENAME, +) from .utils.tqdm_joblib import tqdm_joblib @@ -27,7 +31,9 @@ class _GenerationContext: seed: Optional[int] = None addons: Sequence[BaseAddOn] = () - def to_addon_process_ctx(self, timeseries: TimeSeries, config: Dict) -> AddOnProcessContext: + def to_addon_process_ctx( + self, timeseries: TimeSeries, config: Dict + ) -> AddOnProcessContext: return AddOnProcessContext( timeseries=timeseries, config=config, @@ -37,10 +43,9 @@ def to_addon_process_ctx(self, timeseries: TimeSeries, config: Dict) -> AddOnPro class GutenTAG: - def __init__(self, - n_jobs: int = 1, - seed: Optional[int] = None, - addons: Sequence[str] = ()): + def __init__( + self, n_jobs: int = 1, seed: Optional[int] = None, addons: Sequence[str] = () + ): self._overview = Overview() self._timeseries: List[TimeSeries] = [] self._n_jobs = n_jobs @@ -52,12 +57,16 @@ def __init__(self, self.seed = seed self.addons: Dict[str, BaseAddOn] = {} - def load_config_json(self, json_config_path: os.PathLike, only: Optional[str] = None) -> GutenTAG: + def load_config_json( + self, json_config_path: os.PathLike, only: Optional[str] = None + ) -> GutenTAG: with open(json_config_path, "r") as f: config = json.load(f) return self.load_config_dict(config, only) - def load_config_yaml(self, yaml_config_path: os.PathLike, only: Optional[str] = None) -> GutenTAG: + def load_config_yaml( + self, yaml_config_path: os.PathLike, only: Optional[str] = None + ) -> GutenTAG: with open(yaml_config_path, "r") as f: config = yaml.load(f, Loader=yaml.FullLoader) return self.load_config_dict(config, only) @@ -67,7 +76,9 @@ def load_config_dict(self, config: Dict, only: Optional[str] = None) -> GutenTAG # the validator's ones. config_parser = ConfigParser(only=only) timeseries = [] - for base_oscillations, anomalies, options, ts_config in config_parser.parse(config): + for base_oscillations, anomalies, options, ts_config in config_parser.parse( + config + ): ts = TimeSeries(base_oscillations, anomalies, **options.to_dict()) timeseries.append(ts) ConfigValidator().validate(config) @@ -78,13 +89,19 @@ def load_config_dict(self, config: Dict, only: Optional[str] = None) -> GutenTAG def remove_by_name(self, name: Union[str, Callable[[str], bool]]) -> GutenTAG: if isinstance(name, str): - self._timeseries = [ts for ts in self._timeseries if ts.dataset_name != name] + self._timeseries = [ + ts for ts in self._timeseries if ts.dataset_name != name + ] else: - self._timeseries = [ts for ts in self._timeseries if not name(ts.dataset_name)] + self._timeseries = [ + ts for ts in self._timeseries if not name(ts.dataset_name) + ] self._overview.remove_dataset_by_name(name) return self - def use_addon(self, addon: str, insert_location: Union[str, int] = "last") -> GutenTAG: + def use_addon( + self, addon: str, insert_location: Union[str, int] = "last" + ) -> GutenTAG: if addon not in self._registered_addons: if insert_location == "last": self._registered_addons.append(addon) @@ -95,18 +112,23 @@ def use_addon(self, addon: str, insert_location: Union[str, int] = "last") -> Gu else: ValueError(f"insert_position={insert_location} unknown!") else: - ValueError(f"'{addon}' already loaded at position {self._registered_addons.index(addon)}") + ValueError( + f"'{addon}' already loaded at position {self._registered_addons.index(addon)}" + ) return self - def generate(self, - return_timeseries: bool = False, - output_folder: Optional[os.PathLike] = None, - plot: bool = False) -> Optional[List[ExtTimeSeries]]: + def generate( + self, + return_timeseries: bool = False, + output_folder: Optional[os.PathLike] = None, + plot: bool = False, + ) -> Optional[List[ExtTimeSeries]]: n_jobs = self._n_jobs if n_jobs != 1 and plot: warnings.warn( f"Cannot generate time series in parallel while plotting ('n_jobs' was set to {n_jobs})! Falling " - f"back to serial generation.") + f"back to serial generation." + ) n_jobs = 1 # prepare @@ -116,7 +138,12 @@ def generate(self, folder.mkdir(exist_ok=True) addon_types = import_addons(list(self._registered_addons)) - addons = [addon() for addon in tqdm(addon_types, desc="Initializing addons", total=len(addon_types))] + addons = [ + addon() + for addon in tqdm( + addon_types, desc="Initializing addons", total=len(addon_types) + ) + ] for name, addon in zip(self._registered_addons, addons): self.addons[name] = addon @@ -129,7 +156,9 @@ def generate(self, return_timeseries=return_timeseries, ) with tqdm_joblib(tqdm(desc="Generating datasets", total=len(self._timeseries))): - results: List[Tuple[Dict, Dict[str, Any], Optional[List[ExtTimeSeries]]]] = Parallel(n_jobs=n_jobs)( + results: List[ + Tuple[Dict, Dict[str, Any], Optional[List[ExtTimeSeries]]] + ] = Parallel(n_jobs=n_jobs)( delayed(self.internal_generate)(ctx, ts, config) for ts, config in zip(self._timeseries, self._overview.datasets) ) @@ -140,9 +169,7 @@ def generate(self, if folder is not None: self._overview.save_to_output_dir(folder) finalize_ctx = AddOnFinalizeContext( - overview=self._overview, - plot=plot, - output_folder=output_folder + overview=self._overview, plot=plot, output_folder=output_folder ) finalize_ctx.fill_store(data_dicts) for addon in tqdm(addons, desc="Finalizing addons", total=len(addons)): @@ -153,7 +180,9 @@ def generate(self, return None @staticmethod - def internal_generate(ctx: _GenerationContext, ts: TimeSeries, config: Dict) -> Tuple[Dict, Dict[str, Any], Optional[List[ExtTimeSeries]]]: + def internal_generate( + ctx: _GenerationContext, ts: TimeSeries, config: Dict + ) -> Tuple[Dict, Dict[str, Any], Optional[List[ExtTimeSeries]]]: ts.generate(ctx.seed) addon_ctx = ctx.to_addon_process_ctx(ts, config) for addon in ctx.addons: @@ -181,34 +210,45 @@ def save_timeseries(ts: TimeSeries, output_dir: os.PathLike) -> None: ts.to_csv(dataset_folder / UNSUPERVISED_FILENAME, TrainingType.TEST) if ts.supervised: - ts.to_csv(dataset_folder / SUPERVISED_FILENAME, TrainingType.TRAIN_ANOMALIES) + ts.to_csv( + dataset_folder / SUPERVISED_FILENAME, TrainingType.TRAIN_ANOMALIES + ) if ts.semi_supervised: - ts.to_csv(dataset_folder / SEMI_SUPERVISED_FILENAME, TrainingType.TRAIN_NO_ANOMALIES) + ts.to_csv( + dataset_folder / SEMI_SUPERVISED_FILENAME, + TrainingType.TRAIN_NO_ANOMALIES, + ) @staticmethod - def from_json(path: os.PathLike, - n_jobs: int = 1, - seed: Optional[int] = None, - addons: Sequence[str] = (), - only: Optional[str] = None) -> GutenTAG: + def from_json( + path: os.PathLike, + n_jobs: int = 1, + seed: Optional[int] = None, + addons: Sequence[str] = (), + only: Optional[str] = None, + ) -> GutenTAG: gt = GutenTAG(n_jobs=n_jobs, seed=seed, addons=addons) return gt.load_config_json(path, only=only) @staticmethod - def from_yaml(path: os.PathLike, - n_jobs: int = 1, - seed: Optional[int] = None, - addons: Sequence[str] = (), - only: Optional[str] = None) -> GutenTAG: + def from_yaml( + path: os.PathLike, + n_jobs: int = 1, + seed: Optional[int] = None, + addons: Sequence[str] = (), + only: Optional[str] = None, + ) -> GutenTAG: gt = GutenTAG(n_jobs=n_jobs, seed=seed, addons=addons) return gt.load_config_yaml(path, only=only) @staticmethod - def from_dict(config: Dict, - n_jobs: int = 1, - seed: Optional[int] = None, - addons: Sequence[str] = (), - only: Optional[str] = None) -> GutenTAG: + def from_dict( + config: Dict, + n_jobs: int = 1, + seed: Optional[int] = None, + addons: Sequence[str] = (), + only: Optional[str] = None, + ) -> GutenTAG: gt = GutenTAG(n_jobs=n_jobs, seed=seed, addons=addons) return gt.load_config_dict(config, only=only) diff --git a/gutenTAG/utils/compatibility.py b/gutenTAG/utils/compatibility.py index 5784698..5748300 100644 --- a/gutenTAG/utils/compatibility.py +++ b/gutenTAG/utils/compatibility.py @@ -19,16 +19,34 @@ class Compatibility: [1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1], # variance [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0], # mode_correlation ], - columns=[BASE_OSCILLATION_NAMES.SINE, BASE_OSCILLATION_NAMES.COSINE, BASE_OSCILLATION_NAMES.SQUARE, - BASE_OSCILLATION_NAMES.RANDOM_WALK, BASE_OSCILLATION_NAMES.CYLINDER_BELL_FUNNEL, - BASE_OSCILLATION_NAMES.ECG, BASE_OSCILLATION_NAMES.POLYNOMIAL, BASE_OSCILLATION_NAMES.RANDOM_MODE_JUMP, - BASE_OSCILLATION_NAMES.FORMULA, BASE_OSCILLATION_NAMES.SAWTOOTH, BASE_OSCILLATION_NAMES.DIRICHLET, - BASE_OSCILLATION_NAMES.MLS, BASE_OSCILLATION_NAMES.CUSTOM_INPUT], - index=[ANOMALY_TYPE_NAMES.AMPLITUDE, ANOMALY_TYPE_NAMES.EXTREMUM, ANOMALY_TYPE_NAMES.FREQUENCY, - ANOMALY_TYPE_NAMES.MEAN, ANOMALY_TYPE_NAMES.PATTERN, ANOMALY_TYPE_NAMES.PATTERN_SHIFT, - ANOMALY_TYPE_NAMES.PLATFORM, ANOMALY_TYPE_NAMES.TREND, ANOMALY_TYPE_NAMES.VARIANCE, - ANOMALY_TYPE_NAMES.MODE_CORRELATION], - dtype=bool + columns=[ + BASE_OSCILLATION_NAMES.SINE, + BASE_OSCILLATION_NAMES.COSINE, + BASE_OSCILLATION_NAMES.SQUARE, + BASE_OSCILLATION_NAMES.RANDOM_WALK, + BASE_OSCILLATION_NAMES.CYLINDER_BELL_FUNNEL, + BASE_OSCILLATION_NAMES.ECG, + BASE_OSCILLATION_NAMES.POLYNOMIAL, + BASE_OSCILLATION_NAMES.RANDOM_MODE_JUMP, + BASE_OSCILLATION_NAMES.FORMULA, + BASE_OSCILLATION_NAMES.SAWTOOTH, + BASE_OSCILLATION_NAMES.DIRICHLET, + BASE_OSCILLATION_NAMES.MLS, + BASE_OSCILLATION_NAMES.CUSTOM_INPUT, + ], + index=[ + ANOMALY_TYPE_NAMES.AMPLITUDE, + ANOMALY_TYPE_NAMES.EXTREMUM, + ANOMALY_TYPE_NAMES.FREQUENCY, + ANOMALY_TYPE_NAMES.MEAN, + ANOMALY_TYPE_NAMES.PATTERN, + ANOMALY_TYPE_NAMES.PATTERN_SHIFT, + ANOMALY_TYPE_NAMES.PLATFORM, + ANOMALY_TYPE_NAMES.TREND, + ANOMALY_TYPE_NAMES.VARIANCE, + ANOMALY_TYPE_NAMES.MODE_CORRELATION, + ], + dtype=bool, ) @staticmethod @@ -38,6 +56,6 @@ def check(anomaly: str, base_oscillation: str) -> bool: except KeyError: warnings.warn( message=f"No compatibility information for BO {base_oscillation} and anomaly {anomaly} found!", - category=UserWarning + category=UserWarning, ) return False diff --git a/gutenTAG/utils/default_values.py b/gutenTAG/utils/default_values.py index 0b47fb2..33c1af7 100644 --- a/gutenTAG/utils/default_values.py +++ b/gutenTAG/utils/default_values.py @@ -30,12 +30,12 @@ PARAMETERS.INPUT_TIMESERIES_PATH_TRAIN: None, PARAMETERS.INPUT_TIMESERIES_PATH_TEST: None, PARAMETERS.USE_COLUMN_TRAIN: None, - PARAMETERS.USE_COLUMN_TEST: None + PARAMETERS.USE_COLUMN_TEST: None, }, ANOMALIES: { PARAMETERS.LENGTH: 200, PARAMETERS.POSITION: "middle", PARAMETERS.CHANNEL: 0, - PARAMETERS.CREEPING_LENGTH: 0 - } + PARAMETERS.CREEPING_LENGTH: 0, + }, } diff --git a/gutenTAG/utils/global_variables.py b/gutenTAG/utils/global_variables.py index 3cec123..3c20fe8 100644 --- a/gutenTAG/utils/global_variables.py +++ b/gutenTAG/utils/global_variables.py @@ -87,7 +87,7 @@ class CONFIG_SCHEMA: "anomaly.guten-tag-generation-config.schema.yaml", "anomaly-kind.guten-tag-generation-config.schema.yaml", "formula.guten-tag-generation-config.schema.yaml", - "oscillation.guten-tag-generation-config.schema.yaml" + "oscillation.guten-tag-generation-config.schema.yaml", ] @staticmethod diff --git a/gutenTAG/utils/logger.py b/gutenTAG/utils/logger.py index 01b8038..b4f7280 100644 --- a/gutenTAG/utils/logger.py +++ b/gutenTAG/utils/logger.py @@ -8,7 +8,9 @@ def __init__(self): self.logger.addHandler(logging.StreamHandler()) def warn_false_combination(self, anomaly: str, base_oscillation: str): - self.logger.warning(f"You tried to generate '{anomaly}' on '{base_oscillation}'. That doesn't work! Guten Tag!") + self.logger.warning( + f"You tried to generate '{anomaly}' on '{base_oscillation}'. That doesn't work! Guten Tag!" + ) if __name__ == "__main__": diff --git a/gutenTAG/utils/types.py b/gutenTAG/utils/types.py index d81af74..f1e8b63 100644 --- a/gutenTAG/utils/types.py +++ b/gutenTAG/utils/types.py @@ -12,26 +12,33 @@ def __init__(self, seed: SeedSequence): self.seed: SeedSequence = seed self.rng: np.random.Generator = np.random.default_rng(self.seed) - def to_bo(self, channel: int = 0, previous_channels: Sequence[np.ndarray] = ()) -> BOGenerationContext: + def to_bo( + self, channel: int = 0, previous_channels: Sequence[np.ndarray] = () + ) -> BOGenerationContext: return BOGenerationContext( seed=self.seed, rng=self.rng, channel=channel, - previous_channels=list(previous_channels) + previous_channels=list(previous_channels), ) - def to_anomaly(self, bo: 'BaseOscillationInterface', # type: ignore # to prevent circular import - previous_anomaly_positions: Sequence[Tuple[int, int]]) -> AnomalyGenerationContext: + def to_anomaly( + self, + bo: "BaseOscillationInterface", # type: ignore # noqa: F821 # to prevent circular import + previous_anomaly_positions: Sequence[Tuple[int, int]], + ) -> AnomalyGenerationContext: return AnomalyGenerationContext( seed=self.seed, rng=self.rng, base_oscillation=bo, - previous_anomaly_positions=list(previous_anomaly_positions) + previous_anomaly_positions=list(previous_anomaly_positions), ) @staticmethod - def re_seed(new_seeds: Union[int, Optional[int], Sequence[int]], - base_seed: Union[int, SeedSequence] = 0) -> SeedSequence: + def re_seed( + new_seeds: Union[int, Optional[int], Sequence[int]], + base_seed: Union[int, SeedSequence] = 0, + ) -> SeedSequence: if isinstance(base_seed, SeedSequence): if isinstance(base_seed.entropy, int): initial_entropy: Sequence[int] = [base_seed.entropy] @@ -45,7 +52,7 @@ def re_seed(new_seeds: Union[int, Optional[int], Sequence[int]], new_seeds = [] elif isinstance(new_seeds, int): new_seeds = [new_seeds] - return SeedSequence(initial_entropy + new_seeds) # type: ignore # wrong type declaration in SeedSequence + return SeedSequence(initial_entropy + new_seeds) # type: ignore # wrong type declaration in SeedSequence @dataclass @@ -62,7 +69,7 @@ def to_trend(self) -> BOGenerationContext: rng=self.rng, channel=self.channel, previous_channels=self.previous_channels, - is_trend=True + is_trend=True, ) @staticmethod @@ -74,7 +81,7 @@ def default() -> BOGenerationContext: class AnomalyGenerationContext(GenerationContext): seed: SeedSequence rng: np.random.Generator - base_oscillation: 'BaseOscillationInterface' # type: ignore # to prevent circular import + base_oscillation: "BaseOscillationInterface" # type: ignore # noqa: F821 # to prevent circular import previous_anomaly_positions: List[Tuple[int, int]] @property diff --git a/requirements.dev b/requirements.dev index 23ecce1..8808c9e 100644 --- a/requirements.dev +++ b/requirements.dev @@ -5,6 +5,9 @@ pytest pytest-cov mypy==0.920 freezegun +flake8==5.0.4 +black==23.1 +pre-commit==2.21 types-PyYAML types-jsonschema diff --git a/setup.cfg b/setup.cfg index ef7b7c6..b040602 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,9 +1,11 @@ [metadata] +description-file = README.md license_files = LICENSE [coverage:run] branch = False +parallel = True omit = **/__main__.py @@ -14,8 +16,9 @@ exclude_lines = \.\.\. def __repr__ pass + if __name__ == .__main__.: fail_under = 80 -skip_covered = False +skip_covered = True sort = Cover [mypy] @@ -52,3 +55,22 @@ ignore_missing_imports = True [mypy-neurokit2.*] ignore_missing_imports = True + +[flake8] +exclude = + .git + __pycache__ + build + dist + doc +max-complexity = 10 +hang-closing = False +max-line-length = 88 +extend-select = + B950 +extend-ignore = + E203, + E501 +per-file-ignores = + __init__.py:F401 + gutenTAG/api/*:F401 diff --git a/setup.py b/setup.py index 2e661a7..4caeb2f 100644 --- a/setup.py +++ b/setup.py @@ -33,8 +33,14 @@ def run(self) -> None: import pytest from pytest import ExitCode - exit_code = pytest.main(["--cov-report=term", "--cov-report=xml:coverage.xml", - f"--cov={PYTHON_NAME}", "tests"]) + exit_code = pytest.main( + [ + "--cov-report=term", + "--cov-report=xml:coverage.xml", + f"--cov={PYTHON_NAME}", + "tests", + ] + ) if exit_code == ExitCode.TESTS_FAILED: raise ValueError("Tests failed!") elif exit_code == ExitCode.INTERRUPTED: @@ -76,12 +82,16 @@ def finalize_options(self): pass def run(self): - files = [ - ".coverage*", - "coverage.xml" + files = [".coverage*", "coverage.xml"] + dirs = [ + "build", + "dist", + "*.egg-info", + "**/__pycache__", + ".mypy_cache", + ".pytest_cache", + "**/.ipynb_checkpoints", ] - dirs = ["build", "dist", "*.egg-info", "**/__pycache__", ".mypy_cache", - ".pytest_cache", "**/.ipynb_checkpoints"] for d in dirs: for filename in glob.glob(d): shutil.rmtree(filename, ignore_errors=True) @@ -107,12 +117,18 @@ def run(self): license="MIT", classifiers=[ "License :: OSI Approved :: MIT License", + "Programming Language :: Python", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11" + "Programming Language :: Python :: 3.11", + "Typing :: Typed", + "Topic :: Software Development", + "Topic :: Scientific/Engineering", + "Intended Audience :: Science/Research", + "Intended Audience :: Developers", ], packages=find_packages(exclude=("tests", "tests.*")), package_data={"gutenTAG": ["py.typed", "config/schema/*"]}, @@ -122,13 +138,9 @@ def run(self): cmdclass={ "test": PyTestCommand, "typecheck": MyPyCheckCommand, - "clean": CleanCommand + "clean": CleanCommand, }, zip_safe=False, # provides="gutenTAG", - entry_points={ - "console_scripts": [ - "gutenTAG=gutenTAG.__main__:cli" - ] - } + entry_points={"console_scripts": ["gutenTAG=gutenTAG.__main__:cli"]}, ) diff --git a/tests/__init__.py b/tests/__init__.py index 24fae26..9b4a926 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,5 +1,5 @@ import unittest -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/test_addons.py b/tests/test_addons.py index fdbc0b6..2785161 100644 --- a/tests/test_addons.py +++ b/tests/test_addons.py @@ -4,7 +4,12 @@ import numpy as np from gutenTAG import GutenTAG -from gutenTAG.addons import AddOnProcessContext, AddOnFinalizeContext, BaseAddOn, import_addons +from gutenTAG.addons import ( + AddOnProcessContext, + AddOnFinalizeContext, + BaseAddOn, + import_addons, +) from gutenTAG.addons.timeeval import TimeEvalAddOn @@ -43,7 +48,9 @@ def test_import_wrong_inheritance(self): self.assertRegex(str(ex.exception), r"is not a compatible AddOn") def test_timeeval_addon_rmj(self): - gutentag = GutenTAG.from_yaml(Path("tests/configs/example-config-rmj.yaml"), seed=42) + gutentag = GutenTAG.from_yaml( + Path("tests/configs/example-config-rmj.yaml"), seed=42 + ) gutentag.generate() addon = TimeEvalAddOn() self._execute_addon(gutentag, addon) @@ -65,7 +72,9 @@ def test_timeeval_addon_rmj(self): self.assertEqual(df["period_size"][0], 20) def test_timeeval_addon_sine(self): - gutentag = GutenTAG.from_yaml(Path("tests/configs/example-config-ecg.yaml"), seed=42) + gutentag = GutenTAG.from_yaml( + Path("tests/configs/example-config-ecg.yaml"), seed=42 + ) gutentag.generate() addon = TimeEvalAddOn() self._execute_addon(gutentag, addon) diff --git a/tests/test_base_oscillations/test_formula.py b/tests/test_base_oscillations/test_formula.py index 5db8d96..4ddb294 100644 --- a/tests/test_base_oscillations/test_formula.py +++ b/tests/test_base_oscillations/test_formula.py @@ -5,31 +5,16 @@ from gutenTAG.base_oscillations.formula import FormulaParser # type: ignore + class TestFormula(unittest.TestCase): def test_formula_parser(self): - prev_channels = [ - np.arange(10), - np.ones(10) - ] + prev_channels = [np.arange(10), np.ones(10)] d = { - "base": { - "base": 0, - "operation": { - "kind": "+", - "operand": { - "base": 0 - } - } - }, + "base": {"base": 0, "operation": {"kind": "+", "operand": {"base": 0}}}, "operation": { "kind": "*", - "operand": { - "base": 1, - "aggregation": { - "kind": "sum" - } - } - } + "operand": {"base": 1, "aggregation": {"kind": "sum"}}, + }, } expected = (np.arange(10) + np.arange(10)) * np.ones(10).sum() parsed = FormulaParser(d).parse(prev_channels=prev_channels).execute() diff --git a/tests/test_generator/test_compatibility.py b/tests/test_generator/test_compatibility.py index 455670e..ab85fb6 100644 --- a/tests/test_generator/test_compatibility.py +++ b/tests/test_generator/test_compatibility.py @@ -10,11 +10,7 @@ def setUp(self) -> None: { "name": "ecg", "length": 100, - "base-oscillations": [ - { - "kind": "ecg" - } - ], + "base-oscillations": [{"kind": "ecg"}], "anomalies": [ { "position": "middle", @@ -24,11 +20,11 @@ def setUp(self) -> None: { "kind": "pattern-shift", "shift_by": 5, - "transition_window": 10 + "transition_window": 10, } - ] + ], } - ] + ], } ] } @@ -37,11 +33,7 @@ def setUp(self) -> None: { "name": "ecg", "length": 100, - "base-oscillations": [ - { - "kind": "ecg" - } - ], + "base-oscillations": [{"kind": "ecg"}], "anomalies": [ { "position": "middle", @@ -51,14 +43,14 @@ def setUp(self) -> None: { "kind": "pattern-shift", "shift_by": 5, - "transition_window": 10 + "transition_window": 10, }, { "kind": "mode-correlation", - } - ] + }, + ], } - ] + ], } ] } diff --git a/tests/test_generator/test_overview_sanitizer.py b/tests/test_generator/test_overview_sanitizer.py index d33a696..feb02a4 100644 --- a/tests/test_generator/test_overview_sanitizer.py +++ b/tests/test_generator/test_overview_sanitizer.py @@ -13,12 +13,7 @@ def setUp(self) -> None: { "name": "ecg", "length": np.int_(100), - "base-oscillations": [ - { - "kind": "ecg", - "frequency": np.float_(2) - } - ], + "base-oscillations": [{"kind": "ecg", "frequency": np.float_(2)}], "anomalies": [ { "position": "middle", @@ -29,11 +24,11 @@ def setUp(self) -> None: "kind": "pattern-shift", "shift_by": np.int_(5), "transition_window": np.int_(10), - "some_array": np.array([1.2, 2.3]) + "some_array": np.array([1.2, 2.3]), } - ] + ], } - ] + ], } ] } @@ -43,12 +38,7 @@ def setUp(self) -> None: { "name": "ecg", "length": 100, - "base-oscillations": [ - { - "kind": "ecg", - "frequency": 2.0 - } - ], + "base-oscillations": [{"kind": "ecg", "frequency": 2.0}], "anomalies": [ { "position": "middle", @@ -59,11 +49,11 @@ def setUp(self) -> None: "kind": "pattern-shift", "shift_by": 5, "transition_window": 10, - "some_array": [1.2, 2.3] + "some_array": [1.2, 2.3], } - ] + ], } - ] + ], } ] } diff --git a/tests/test_integration/__init__.py b/tests/test_integration/__init__.py index 480388e..0528525 100644 --- a/tests/test_integration/__init__.py +++ b/tests/test_integration/__init__.py @@ -16,9 +16,14 @@ def _load_config_and_generate(self, config_path: str) -> pd.DataFrame: assert df_generated is not None, "DataFrame should have been returned" return df_generated[0].timeseries - def _compare_expected_and_generated(self, config_path: str, expected_path: str, columns: List[str]): - expected_ts = pd.read_csv(expected_path, index_col="timestamp", - dtype={'value-0': np.float64, 'is_anomaly': np.int8}) + def _compare_expected_and_generated( + self, config_path: str, expected_path: str, columns: List[str] + ): + expected_ts = pd.read_csv( + expected_path, + index_col="timestamp", + dtype={"value-0": np.float64, "is_anomaly": np.int8}, + ) df_generated = self._load_config_and_generate(config_path) for column in columns: diff --git a/tests/test_integration/test_anomaly_types.py b/tests/test_integration/test_anomaly_types.py index f4eccc6..45fa142 100644 --- a/tests/test_integration/test_anomaly_types.py +++ b/tests/test_integration/test_anomaly_types.py @@ -3,41 +3,57 @@ class TestIntegrationAnomalyTypes(TestIntegration): def test_pattern_sinusoid_from_config(self): - self._compare_expected_and_generated("tests/configs/example-config-pattern.yaml", - "tests/generated/example-ts-pattern.csv", - ["value-0", "is_anomaly"]) + self._compare_expected_and_generated( + "tests/configs/example-config-pattern.yaml", + "tests/generated/example-ts-pattern.csv", + ["value-0", "is_anomaly"], + ) def test_extremum_from_config(self): - self._compare_expected_and_generated("tests/configs/example-config-extremum.yaml", - "tests/generated/example-ts-extremum.csv", - ["value-0", "is_anomaly"]) + self._compare_expected_and_generated( + "tests/configs/example-config-extremum.yaml", + "tests/generated/example-ts-extremum.csv", + ["value-0", "is_anomaly"], + ) def test_amplitude_from_config(self): - self._compare_expected_and_generated("tests/configs/example-config-amplitude.yaml", - "tests/generated/example-ts-amplitude.csv", - ["value-0", "is_anomaly"]) + self._compare_expected_and_generated( + "tests/configs/example-config-amplitude.yaml", + "tests/generated/example-ts-amplitude.csv", + ["value-0", "is_anomaly"], + ) def test_trend_anomaly_from_config(self): - self._compare_expected_and_generated("tests/configs/example-config-trend-anomaly.yaml", - "tests/generated/example-ts-trend-anomaly.csv", - ["value-0", "is_anomaly"]) + self._compare_expected_and_generated( + "tests/configs/example-config-trend-anomaly.yaml", + "tests/generated/example-ts-trend-anomaly.csv", + ["value-0", "is_anomaly"], + ) def test_creeping_amplitude_anomaly_from_config(self): - self._compare_expected_and_generated("tests/configs/example-config-creeping-amplitude.yaml", - "tests/generated/example-ts-creeping-amplitude.csv", - ["value-0", "is_anomaly"]) + self._compare_expected_and_generated( + "tests/configs/example-config-creeping-amplitude.yaml", + "tests/generated/example-ts-creeping-amplitude.csv", + ["value-0", "is_anomaly"], + ) def test_creeping_mean_anomaly_from_config(self): - self._compare_expected_and_generated("tests/configs/example-config-creeping-mean.yaml", - "tests/generated/example-ts-creeping-mean.csv", - ["value-0", "is_anomaly"]) + self._compare_expected_and_generated( + "tests/configs/example-config-creeping-mean.yaml", + "tests/generated/example-ts-creeping-mean.csv", + ["value-0", "is_anomaly"], + ) def test_creeping_variance_anomaly_from_config(self): - self._compare_expected_and_generated("tests/configs/example-config-creeping-variance.yaml", - "tests/generated/example-ts-creeping-variance.csv", - ["value-0", "is_anomaly"]) + self._compare_expected_and_generated( + "tests/configs/example-config-creeping-variance.yaml", + "tests/generated/example-ts-creeping-variance.csv", + ["value-0", "is_anomaly"], + ) def test_creeping_variance_transition_anomaly_from_config(self): - self._compare_expected_and_generated("tests/configs/example-config-creeping-variance-transition.yaml", - "tests/generated/example-ts-creeping-variance-transition.csv", - ["value-0", "is_anomaly"]) + self._compare_expected_and_generated( + "tests/configs/example-config-creeping-variance-transition.yaml", + "tests/generated/example-ts-creeping-variance-transition.csv", + ["value-0", "is_anomaly"], + ) diff --git a/tests/test_integration/test_base_oscillations.py b/tests/test_integration/test_base_oscillations.py index 4e472ea..92e2ae5 100644 --- a/tests/test_integration/test_base_oscillations.py +++ b/tests/test_integration/test_base_oscillations.py @@ -2,93 +2,128 @@ class TestIntegrationBaseOscillations(TestIntegration): - def test_generates_single_channel(self): - self._compare_expected_and_generated("tests/configs/example-config-single.yaml", - "tests/generated/example-ts-single.csv", - ["value-0", "is_anomaly"]) + self._compare_expected_and_generated( + "tests/configs/example-config-single.yaml", + "tests/generated/example-ts-single.csv", + ["value-0", "is_anomaly"], + ) def test_generates_single_channel_bo_list(self): - self._compare_expected_and_generated("tests/configs/example-config-single-element.yaml", - "tests/generated/example-ts-single.csv", - ["value-0", "is_anomaly"]) + self._compare_expected_and_generated( + "tests/configs/example-config-single-element.yaml", + "tests/generated/example-ts-single.csv", + ["value-0", "is_anomaly"], + ) def test_generates_multi_channel(self): - self._compare_expected_and_generated("tests/configs/example-config-multi.yaml", - "tests/generated/example-ts-multi.csv", - ["value-0", "value-1", "is_anomaly"]) + self._compare_expected_and_generated( + "tests/configs/example-config-multi.yaml", + "tests/generated/example-ts-multi.csv", + ["value-0", "value-1", "is_anomaly"], + ) def test_generates_multi_channel_bo_list(self): - self._compare_expected_and_generated("tests/configs/example-config-multi-element.yaml", - "tests/generated/example-ts-multi.csv", - ["value-0", "value-1", "is_anomaly"]) + self._compare_expected_and_generated( + "tests/configs/example-config-multi-element.yaml", + "tests/generated/example-ts-multi.csv", + ["value-0", "value-1", "is_anomaly"], + ) def test_rmj_bo_list(self): - self._compare_expected_and_generated("tests/configs/example-config-rmj.yaml", - "tests/generated/example-ts-rmj.csv", - ["value-0", "value-1", "is_anomaly"]) - + self._compare_expected_and_generated( + "tests/configs/example-config-rmj.yaml", + "tests/generated/example-ts-rmj.csv", + ["value-0", "value-1", "is_anomaly"], + ) + def test_custom_input_from_config(self): - self._compare_expected_and_generated("tests/configs/example-config-custom-input.yaml", - "tests/generated/example-ts-custom-input.csv", - ["value-0", "value-1","is_anomaly"]) + self._compare_expected_and_generated( + "tests/configs/example-config-custom-input.yaml", + "tests/generated/example-ts-custom-input.csv", + ["value-0", "value-1", "is_anomaly"], + ) def test_cbf_from_config(self): - self._compare_expected_and_generated("tests/configs/example-config-cbf.yaml", - "tests/generated/example-ts-cbf.csv", - ["value-0", "is_anomaly"]) + self._compare_expected_and_generated( + "tests/configs/example-config-cbf.yaml", + "tests/generated/example-ts-cbf.csv", + ["value-0", "is_anomaly"], + ) def test_rw_from_config(self): - self._compare_expected_and_generated("tests/configs/example-config-rw.yaml", - "tests/generated/example-ts-rw.csv", - ["value-0", "is_anomaly"]) + self._compare_expected_and_generated( + "tests/configs/example-config-rw.yaml", + "tests/generated/example-ts-rw.csv", + ["value-0", "is_anomaly"], + ) def test_ecg_from_config(self): - self._compare_expected_and_generated("tests/configs/example-config-ecg.yaml", - "tests/generated/example-ts-ecg.csv", - ["value-0", "is_anomaly"]) + self._compare_expected_and_generated( + "tests/configs/example-config-ecg.yaml", + "tests/generated/example-ts-ecg.csv", + ["value-0", "is_anomaly"], + ) def test_polynomial_from_config(self): - self._compare_expected_and_generated("tests/configs/example-config-polynomial.yaml", - "tests/generated/example-ts-polynomial.csv", - ["value-0", "is_anomaly"]) + self._compare_expected_and_generated( + "tests/configs/example-config-polynomial.yaml", + "tests/generated/example-ts-polynomial.csv", + ["value-0", "is_anomaly"], + ) def test_trend_from_config(self): - self._compare_expected_and_generated("tests/configs/example-config-trend.yaml", - "tests/generated/example-ts-trend.csv", - ["value-0", "is_anomaly"]) + self._compare_expected_and_generated( + "tests/configs/example-config-trend.yaml", + "tests/generated/example-ts-trend.csv", + ["value-0", "is_anomaly"], + ) def test_nested_trend_from_config(self): - self._compare_expected_and_generated("tests/configs/example-config-nested-trend.yaml", - "tests/generated/example-ts-nested-trend.csv", - ["value-0", "is_anomaly"]) + self._compare_expected_and_generated( + "tests/configs/example-config-nested-trend.yaml", + "tests/generated/example-ts-nested-trend.csv", + ["value-0", "is_anomaly"], + ) def test_formula_from_config(self): - self._compare_expected_and_generated("tests/configs/example-config-formula.yaml", - "tests/generated/example-ts-formula.csv", - ["value-0", "is_anomaly"]) + self._compare_expected_and_generated( + "tests/configs/example-config-formula.yaml", + "tests/generated/example-ts-formula.csv", + ["value-0", "is_anomaly"], + ) def test_cosine_from_config(self): - self._compare_expected_and_generated("tests/configs/example-config-cosine.yaml", - "tests/generated/example-ts-cosine.csv", - ["value-0", "is_anomaly"]) + self._compare_expected_and_generated( + "tests/configs/example-config-cosine.yaml", + "tests/generated/example-ts-cosine.csv", + ["value-0", "is_anomaly"], + ) def test_square_from_config(self): - self._compare_expected_and_generated("tests/configs/example-config-square.yaml", - "tests/generated/example-ts-square.csv", - ["value-0", "is_anomaly"]) + self._compare_expected_and_generated( + "tests/configs/example-config-square.yaml", + "tests/generated/example-ts-square.csv", + ["value-0", "is_anomaly"], + ) def test_sawtooth_from_config(self): - self._compare_expected_and_generated("tests/configs/example-config-sawtooth.yaml", - "tests/generated/example-ts-sawtooth.csv", - ["value-0", "is_anomaly"]) + self._compare_expected_and_generated( + "tests/configs/example-config-sawtooth.yaml", + "tests/generated/example-ts-sawtooth.csv", + ["value-0", "is_anomaly"], + ) def test_dirichlet_from_config(self): - self._compare_expected_and_generated("tests/configs/example-config-dirichlet.yaml", - "tests/generated/example-ts-dirichlet.csv", - ["value-0", "is_anomaly"]) + self._compare_expected_and_generated( + "tests/configs/example-config-dirichlet.yaml", + "tests/generated/example-ts-dirichlet.csv", + ["value-0", "is_anomaly"], + ) def test_mls_from_config(self): - self._compare_expected_and_generated("tests/configs/example-config-mls.yaml", - "tests/generated/example-ts-mls.csv", - ["value-0", "is_anomaly"]) + self._compare_expected_and_generated( + "tests/configs/example-config-mls.yaml", + "tests/generated/example-ts-mls.csv", + ["value-0", "is_anomaly"], + ) diff --git a/tests/test_integration/test_benchmark_generation.py b/tests/test_integration/test_benchmark_generation.py index 74e86c3..0294ebe 100644 --- a/tests/test_integration/test_benchmark_generation.py +++ b/tests/test_integration/test_benchmark_generation.py @@ -3,6 +3,6 @@ class TestIntegrationBenchmarkGeneration(TestIntegration): def test_generates_benchmark_files_without_errors(self): - df = self._load_config_and_generate("generation_configs/benchmark-datasets.yaml") + self._load_config_and_generate("generation_configs/benchmark-datasets.yaml") generated_without_errors = True self.assertTrue(generated_without_errors) diff --git a/tests/test_parallel.py b/tests/test_parallel.py index 5150c55..6be904e 100644 --- a/tests/test_parallel.py +++ b/tests/test_parallel.py @@ -10,10 +10,14 @@ class TestParallel(unittest.TestCase): def test_seeded_parallel(self): - expected_ts = pd.read_csv("tests/generated/example-ts-multi.csv", - index_col="timestamp", - dtype={'value-0': np.float64, 'is_anomaly': np.int8}) - gutenTAG = GutenTAG.from_yaml(Path("tests/configs/example-config-multi.yaml"), seed=42, n_jobs=-1) + expected_ts = pd.read_csv( + "tests/generated/example-ts-multi.csv", + index_col="timestamp", + dtype={"value-0": np.float64, "is_anomaly": np.int8}, + ) + gutenTAG = GutenTAG.from_yaml( + Path("tests/configs/example-config-multi.yaml"), seed=42, n_jobs=-1 + ) df_generated = gutenTAG.generate(return_timeseries=True) assert df_generated is not None, "DataFrame should have been returned" df = df_generated[0].timeseries diff --git a/tests/test_seeding.py b/tests/test_seeding.py index 20ae53f..6811750 100644 --- a/tests/test_seeding.py +++ b/tests/test_seeding.py @@ -15,29 +15,37 @@ def setUp(self) -> None: "name": "sine-ts", "length": 500, "base-oscillations": [{"kind": "sine", "frequency": 1.5}], - "anomalies": [{"position": "beginning", "length": 5, "kinds": [{"kind": "frequency", "frequency_factor": 2}]}] + "anomalies": [ + { + "position": "beginning", + "length": 5, + "kinds": [{"kind": "frequency", "frequency_factor": 2}], + } + ], } config_ecg = { "name": "ecg-ts", "length": 1000, "base-oscillations": [{"kind": "ecg", "frequency": 10}], - "anomalies": [{"position": "middle", "length": 100, "kinds": [{"kind": "platform", "value": -1}]}] - } - self.config_single_sine = { - "timeseries": [config_sine] - } - self.config_single_ecg = { - "timeseries": [config_ecg] - } - self.config_multiple = { - "timeseries": [config_sine, config_ecg] + "anomalies": [ + { + "position": "middle", + "length": 100, + "kinds": [{"kind": "platform", "value": -1}], + } + ], } + self.config_single_sine = {"timeseries": [config_sine]} + self.config_single_ecg = {"timeseries": [config_ecg]} + self.config_multiple = {"timeseries": [config_sine, config_ecg]} def _create_and_generate(self, config: dict, seed: int) -> List[TimeSeries]: gutentag = GutenTAG.from_dict(config, seed=seed) return gutentag.generate(return_timeseries=True) # type: ignore # shouldn't be None and if so raise the error later on - def _assert_df_equal(self, df1: pd.DataFrame, df2: pd.DataFrame, columns: Optional[List[str]] = None) -> None: + def _assert_df_equal( + self, df1: pd.DataFrame, df2: pd.DataFrame, columns: Optional[List[str]] = None + ) -> None: if columns is None: columns = np.union1d(df1.columns, df2.columns).tolist() for column in columns: