From 4362fa9abaab2692ec4490782d4bcb6976e5f5dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jerzy=20Kami=C5=84ski?= <86363785+jrzkaminski@users.noreply.github.com> Date: Sat, 27 Apr 2024 22:52:03 +0300 Subject: [PATCH 01/15] requirements update (#109) --- .../nodes/child_nodes/node_models/__init__.py | 0 pyproject.toml | 23 ++++++------- requirements.txt | 34 +++++++++++-------- 3 files changed, 31 insertions(+), 26 deletions(-) delete mode 100644 bamt/core/nodes/child_nodes/node_models/__init__.py diff --git a/bamt/core/nodes/child_nodes/node_models/__init__.py b/bamt/core/nodes/child_nodes/node_models/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/pyproject.toml b/pyproject.toml index fab78d1..8000b61 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "BAMT" -version = "1.2.01" +version = "2.0.0" description = "data modeling and analysis tool based on Bayesian networks" authors = ["Roman Netrogolov ", "Irina Deeva ", @@ -22,20 +22,19 @@ packages = [ ] [tool.poetry.dependencies] -python = ">=3.9,<3.11" -setuptools = "65.6.3" -numpy = ">=1.24.2" -matplotlib = "3.6.2" -pandas = "2.0.3" +python = "^3.10" +numpy = "^1.26.0" +matplotlib = "^3.8.0" +pandas = "^2.2.0" gmr = "1.6.2" -scikit-learn = "^1.2.0" -scipy = "^1.8.0" -pyvis = ">=0.2.1" +scikit-learn = "^1.4.2" +scipy = "^1.13.0" +pyvis = "^0.3.1" missingno = "^0.5.1" -pgmpy = "0.1.20" -thegolem = ">=0.3.3" +pgmpy = "^0.1.20" +thegolem = "^0.3.3" xgboost = ">=1.7.6" -catboost = ">=1.0.6" +catboost = ">=2.0.0" lightgbm = {version = ">=3.3.5", optional = true } [tool.poetry.extras] diff --git a/requirements.txt b/requirements.txt index 4ffdb37..374a8a4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,15 +1,21 @@ -numpy>=1.24.2 -matplotlib==3.6.2 -pandas>=2.0.0 +# Data Manipulation +numpy>=1.26.0 +pandas>=2.2.0 + +# Graph manipulation +networkx>=3.3 +thegolem>=0.4.0 + +# ML modeling frameworks +scikit-learn>=1.4.2 +scipy>=1.13.0 +catboost>=1.2.1 +xgboost>=2.0.0 + +# visualization +matplotlib>=3.8.0 +pyvis>=0.3.1 + +# TODO: exclude these libraries gmr==1.6.2 -scikit-learn>=1.2.0 -scipy>=1.9.3 -pyvis>=0.2.1 -pgmpy==0.1.20 -catboost>=1.0.6 -joblib>=1.1.1 -networkx>=3.1 -tqdm>=4.65.0 -thegolem>=0.3.3 -typing>=3.7.4.3 -xgboost>=1.7.6 +pgmpy==0.1.20 \ No newline at end of file From 826126277d8bc036d4f2d09f88d06f86ae098ba4 Mon Sep 17 00:00:00 2001 From: jrzkaminski Date: Fri, 5 Jul 2024 15:33:21 +0300 Subject: [PATCH 02/15] black autoformat yml --- .github/workflows/black.yml | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 .github/workflows/black.yml diff --git a/.github/workflows/black.yml b/.github/workflows/black.yml new file mode 100644 index 0000000..af032f6 --- /dev/null +++ b/.github/workflows/black.yml @@ -0,0 +1,11 @@ +name: black-action +on: [push, pull_request] +jobs: + linter_name: + name: Run black formatter + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: rickstaa/action-black@v1.3.3 + with: + black_args: "." \ No newline at end of file From 5c838d6f8f7b837a9212da90b2dbb9c06a0ad2d0 Mon Sep 17 00:00:00 2001 From: jrzkaminski Date: Fri, 5 Jul 2024 15:34:51 +0300 Subject: [PATCH 03/15] mkdocs migration --- .github/workflows/ci.yml | 29 +++++++++++++++ .readthedocs.yml | 39 -------------------- mkdocs.yml | 77 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 106 insertions(+), 39 deletions(-) create mode 100644 .github/workflows/ci.yml delete mode 100644 .readthedocs.yml create mode 100644 mkdocs.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..64749c8 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,29 @@ +name: ci +on: + push: + branches: + - master + - main +permissions: + contents: write +jobs: + deploy: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Configure Git Credentials + run: | + git config user.name github-actions[bot] + git config user.email 41898282+github-actions[bot]@users.noreply.github.com + - uses: actions/setup-python@v5 + with: + python-version: 3.x + - run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV + - uses: actions/cache@v4 + with: + key: mkdocs-material-${{ env.cache_id }} + path: .cache + restore-keys: | + mkdocs-material- + - run: pip install mkdocs-material 'mkdocstrings[python]' + - run: mkdocs gh-deploy --force \ No newline at end of file diff --git a/.readthedocs.yml b/.readthedocs.yml deleted file mode 100644 index 2efbd82..0000000 --- a/.readthedocs.yml +++ /dev/null @@ -1,39 +0,0 @@ -# Read the Docs configuration file for Sphinx projects -# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details - -# Required -version: 2 - -# Set the OS, Python version and other tools you might need -build: - os: ubuntu-22.04 - tools: - python: "3.11" - # You can also specify other tool versions: - # nodejs: "20" - # rust: "1.70" - # golang: "1.20" - -# Build documentation in the "docs/" directory with Sphinx -sphinx: - configuration: docs/source/conf.py - # You can configure Sphinx to use a different builder, for instance use the dirhtml builder for simpler URLs - # builder: "dirhtml" - # Fail on all warnings to avoid broken references - # fail_on_warning: true - -# Optionally build your docs in additional formats such as PDF and ePub -# formats: -# - pdf -# - epub - -# Optional but recommended, declare the Python requirements required -# to build your documentation -# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html -# python: -# install: -# - requirements: docs/requirements.txt - -python: - install: - - requirements: other_requirements/readthedocs.txt diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 0000000..0363f66 --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1,77 @@ +site_name: BAMT +repo_name: aimclub/BAMT +repo_url: https://github.com/aimclub/BAMT +theme: + name: material + locale: en + features: + - announce.dismiss + - content.action.edit + - content.action.view + - content.code.annotate + - content.code.copy + # - content.code.select + # - content.footnote.tooltips + # - content.tabs.link + - content.tooltips + # - header.autohide + # - navigation.expand + - navigation.footer + - navigation.indexes + - navigation.instant + # - navigation.instant.prefetch + - navigation.instant.progress + # - navigation.prune + - navigation.sections + - navigation.tabs + # - navigation.tabs.sticky + - navigation.top + - navigation.tracking + - search.highlight + - search.share + - search.suggest + - toc.follow + # - toc.integrate + palette: + - media: "(prefers-color-scheme)" + toggle: + icon: material/link + name: Switch to light mode + - media: "(prefers-color-scheme: light)" + scheme: default + primary: indigo + accent: indigo + toggle: + icon: material/toggle-switch + name: Switch to dark mode + - media: "(prefers-color-scheme: dark)" + scheme: slate + primary: black + accent: indigo + toggle: + icon: material/toggle-switch-off + name: Switch to system preference + font: + text: Roboto + code: Roboto Mono + +nav: + - Home: index.md + - Getting Started: getting-started.md + - Installation: installation.md + - API Reference: + - logger: reference/logger.md +#extra: +# social: +# - icon: fontawesome/brands/github +# link: https://github.com/myusername/myproject +plugins: + - search + - mkdocstrings +markdown_extensions: + - admonition + - codehilite + - footnotes + - meta + - toc: + permalink: True From 8f0802c36154a657545d4ded2cad0a4f93c03a81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jerzy=20Kami=C5=84ski?= <86363785+jrzkaminski@users.noreply.github.com> Date: Fri, 5 Jul 2024 15:51:50 +0300 Subject: [PATCH 04/15] 2.0.0 requirements and readme update (#118) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Integrational tests (#105) * Add files via upload 2 benchmarks for tests added * Add files via upload International test added * Update tests/test_Integrational.py Co-authored-by: Jerzy Kamiński <86363785+jrzkaminski@users.noreply.github.com> * Update test_Integrational.py * Add files via upload * Add files via upload --------- Co-authored-by: Jerzy Kamiński <86363785+jrzkaminski@users.noreply.github.com> * Add seed parameter to BaseNetwork sample function (#110) Allows to generate replicable samples * Delete poetry.lock, update gitignore (#111) * Delete poetry.lock * Update .gitignore * New README.md (#112) * Create README.md * Delete README.rst * Update README.md * Update requirements.txt * Update requirements.txt * Update requirements.txt * Update requirements.txt * Update bamtcodecov.yml * Update requirements.txt --------- Co-authored-by: Pavel <95717191+PabloKarpacho@users.noreply.github.com> Co-authored-by: anton-golubkov --- .github/workflows/bamtcodecov.yml | 2 +- .gitignore | 163 ++++- README.md | 160 ++++ README.rst | 248 ------- bamt/networks/base.py | 0 data/benchmark/auto_price.csv | 160 ++++ data/benchmark/new_thyroid.csv | 216 ++++++ poetry.lock | 1139 ----------------------------- tests/test_Integrational.py | 277 +++++++ 9 files changed, 976 insertions(+), 1389 deletions(-) create mode 100644 README.md delete mode 100644 README.rst create mode 100644 bamt/networks/base.py create mode 100644 data/benchmark/auto_price.csv create mode 100644 data/benchmark/new_thyroid.csv delete mode 100644 poetry.lock create mode 100644 tests/test_Integrational.py diff --git a/.github/workflows/bamtcodecov.yml b/.github/workflows/bamtcodecov.yml index d5b12a3..3c08a9b 100644 --- a/.github/workflows/bamtcodecov.yml +++ b/.github/workflows/bamtcodecov.yml @@ -10,7 +10,7 @@ jobs: - uses: actions/checkout@v3 - uses: actions/setup-python@v4 with: - python-version: '3.9' + python-version: '3.11' - name: Update pip run: python -m pip install --upgrade pip - name: Install requirements diff --git a/.gitignore b/.gitignore index 8875bf3..e49349e 100644 --- a/.gitignore +++ b/.gitignore @@ -14,4 +14,165 @@ tutorials/bamt /temp.ipynb /bamt_house_price_example_feature_analysis.ipynb /bamt_house_price_example_data_sampling.ipynb -/tmp.ipynb \ No newline at end of file +/tmp.ipynb + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +.idea/ diff --git a/README.md b/README.md new file mode 100644 index 0000000..3268555 --- /dev/null +++ b/README.md @@ -0,0 +1,160 @@ +![BAMT framework logo](docs/images/BAMT_white_bg.png) + +# BAMT - Bayesian Analytical and Modelling Toolkit + +Repository of a data modeling and analysis tool based on Bayesian networks. + +## Badges + +| team | ![ITMO](https://raw.githubusercontent.com/ITMO-NSS-team/open-source-ops/cd771018e80e9164f7b661bd2191061ab58f94de/badges/ITMO_badge.svg) ![NCCR](https://raw.githubusercontent.com/ITMO-NSS-team/open-source-ops/cd771018e80e9164f7b661bd2191061ab58f94de/badges/NCCR_badge.svg) | +|------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| package | ![pypi](https://badge.fury.io/py/bamt.svg) ![Supported Python Versions](https://img.shields.io/badge/python_3.9-passing-success) ![Supported Python Versions](https://img.shields.io/badge/python_3.10-passing-success) | +| tests | ![Build](https://github.com/ITMO-NSS-team/BAMT/actions/workflows/bamtcodecov.yml/badge.svg) ![coverage](https://codecov.io/github/aimclub/BAMT/branch/master/graph/badge.svg?token=fA4qsxGqTC) | +| docs | ![Documentation Status](https://readthedocs.org/projects/bamt/badge/?version=latest) | +| license | ![license](https://img.shields.io/github/license/ITMO-NSS-team/BAMT) | +| stats | ![downloads](https://static.pepy.tech/personalized-badge/bamt?period=total&units=international_system&left_color=grey&right_color=blue&left_text=downloads) ![downloads/month](https://static.pepy.tech/personalized-badge/bamt?period=month&units=international_system&left_color=grey&right_color=blue&left_text=downloads/month) ![downloads/week](https://static.pepy.tech/personalized-badge/bamt?period=week&units=international_system&left_color=grey&right_color=blue&left_text=downloads/week) | +| style | ![Black](https://img.shields.io/badge/code%20style-black-000000.svg) | + +## Introduction + +BAMT - Bayesian Analytical and Modelling Toolkit. This repository contains a data modeling and analysis tool based on Bayesian networks. It can be divided into two main parts - algorithms for constructing and training Bayesian networks on data and algorithms for applying Bayesian networks for filling gaps, generating synthetic data, assessing edge strength, etc. + +![bamt readme scheme](docs/images/bamt_readme_scheme.png) + +## Installation + +BAMT package is available via PyPi: + +```bash +pip install bamt +``` + +## BAMT Features + +The following algorithms for Bayesian Networks learning are implemented: + +- Building the structure of a Bayesian network based on expert knowledge by directly specifying the structure of the network. +- Building the structure of a Bayesian network on data using three algorithms - Hill Climbing, evolutionary, and PC (PC is currently under development). For Hill Climbing, the following score functions are implemented - MI, K2, BIC, AIC. The algorithms work on both discrete and mixed data. +- Learning the parameters of distributions in the nodes of the network based on Gaussian distribution and Mixture Gaussian distribution with automatic selection of the number of components. +- Non-parametric learning of distributions at nodes using classification and regression models. +- BigBraveBN - algorithm for structural learning of Bayesian networks with a large number of nodes. Tested on networks with up to 500 nodes. + +### Difference from existing implementations: + +- Algorithms work on mixed data. +- Structural learning implements score-functions for mixed data. +- Parametric learning implements the use of a mixture of Gaussian distributions to approximate continuous distributions. +- Non-parametric learning of distributions with various user-specified regression and classification models. +- The algorithm for structural training of large Bayesian networks (> 10 nodes) is based on local training of small networks with their subsequent algorithmic connection. + +![bn example gif](img/BN_gif.gif) + +For example, in terms of data analysis and modeling using Bayesian networks, a pipeline has been implemented to generate synthetic data by sampling from Bayesian networks. + +![synthetics generation](img/synth_gen.png) + +## How to use + +Then the necessary classes are imported from the library: + +```python +from bamt.networks.hybrid_bn import HybridBN +``` + +Next, a network instance is created and training (structure and parameters) is performed: + +```python +bn = HybridBN(has_logit=False, use_mixture=True) +bn.add_edges(preprocessed_data) +bn.fit_parameters(data) +``` + +## Examples & Tutorials + +More examples can be found in [Documentation](https://bamt.readthedocs.io/en/latest/examples/learn_save.html). + +## Publications about BAMT + +We have published several articles about BAMT: + +- [Advanced Approach for Distributions Parameters Learning in Bayesian Networks with Gaussian Mixture Models and Discriminative Models](https://www.mdpi.com/2227-7390/11/2/343) (2023) +- [BigBraveBN: algorithm of structural learning for bayesian networks with a large number of nodes](https://www.sciencedirect.com/science/article/pii/S1877050922016945) (2022) +- [MIxBN: Library for learning Bayesian networks from mixed data](https://www.sciencedirect.com/science/article/pii/S1877050921020925) (2021) +- [Oil and Gas Reservoirs Parameters Analysis Using Mixed Learning of Bayesian Networks](https://link.springer.com/chapter/10.1007/978-3-030-77961-0_33) (2021) +- [Bayesian Networks-based personal data synthesis](https://dl.acm.org/doi/abs/10.1145/3411170.3411243) (2020) + +## Project structure + +The latest stable version of the library is available in the master branch. + +It includes the following modules and directories: + +- [bamt](https://github.com/ITMO-NSS-team/BAMT/tree/master/bamt) - directory with the framework code: + - Preprocessing - module for data preprocessing + - Networks - module for building and training Bayesian networks + - Nodes - module for nodes support of Bayesian networks + - Utilities - module for mathematical and graph utilities +- [data](https://github.com/ITMO-NSS-team/BAMT/tree/master/data) - directory with data for experiments and tests +- [tests](https://github.com/ITMO-NSS-team/BAMT/tree/master/tests) - directory with unit and integration tests +- [tutorials](https://github.com/ITMO-NSS-team/BAMT/tree/master/tutorials) - directory with tutorials +- [docs](https://github.com/ITMO-NSS-team/BAMT/tree/master/docs) - directory with RTD documentation + +### Preprocessing + +Preprocessor module allows users to transform data according to the pipeline (similar to the pipeline in scikit-learn). + +### Networks + +Three types of networks are implemented: + +- HybridBN - Bayesian network with mixed data +- DiscreteBN - Bayesian network with discrete data +- ContinuousBN - Bayesian network with continuous data + +They are inherited from the abstract class BaseNetwork. + +### Nodes + +Contains classes for nodes of Bayesian networks. + +### Utilities + +Utilities module contains mathematical and graph utilities to support the main functionality of the library. + +## Web-BAMT + +A web interface for BAMT is currently under development. The repository is available at [web-BAMT](https://github.com/aimclub/Web-BAMT). + +## Contacts + +If you have questions or suggestions, you can contact us at the following address: ideeva@itmo.ru (Irina Deeva) + +Our resources: + +- [Natural Systems Simulation Team](https://itmo-nss-team.github.io/) +- [NSS team Telegram channel](https://t.me/NSS_group) +- [NSS lab YouTube channel](https://www.youtube.com/@nsslab/videos) + +## Citation + +```bibtex +@misc{BAMT, + author={BAMT}, + title = {Repository experiments and data}, + year = {2021}, + publisher = {GitHub}, + journal = {GitHub repository}, + howpublished = {\url{https://github.com/ITMO-NSS-team/BAMT.git}}, + url = {https://github.com/ITMO-NSS-team/BAMT.git} +} + +@article{deeva2023advanced, + title={Advanced Approach for Distributions Parameters Learning in Bayesian Networks with Gaussian Mixture Models and Discriminative Models}, + author={Deeva, Irina and Bubnova, Anna and Kalyuzhnaya, Anna V}, + journal={Mathematics}, + volume={11}, + number={2}, + pages={343}, + year={2023}, +} +``` diff --git a/README.rst b/README.rst deleted file mode 100644 index 8f5b3ff..0000000 --- a/README.rst +++ /dev/null @@ -1,248 +0,0 @@ -.. image:: /docs/images/BAMT_white_bg.png - :align: center - :alt: BAMT framework logo - -.. start-badges -.. list-table:: - :stub-columns: 1 - - * - team - - | |ITMO| |NCCR| - * - package - - | |pypi| |py_9| |py_10| - * - tests - - | |Build| |coverage| - * - docs - - |docs| - * - license - - | |license| - * - stats - - | |downloads_stats| |downloads_monthly| |downloads_weekly| - * - style - - | |Black| - -Repository of a data modeling and analysis tool based on Bayesian networks - -BAMT - Bayesian Analytical and Modelling Toolkit. This repository contains a data modeling and analysis tool based on Bayesian networks. It can be divided into two main parts - algorithms for constructing and training Bayesian networks on data and algorithms for applying Bayesian networks for filling gaps, generating synthetic data, assessing edges strength e.t.c. - -.. image:: docs/images/bamt_readme_scheme.png - :target: docs/images/bamt_readme_scheme.png - :align: center - :alt: bamt readme scheme - -Installation -^^^^^^^^^^^^ - -BAMT package is available via PyPi: - -.. code-block:: bash - - pip install bamt - -BAMT Features -^^^^^^^^^^^^^ - -The following algorithms for Bayesian Networks learning are implemented: - - -* Building the structure of a Bayesian network based on expert knowledge by directly specifying the structure of the network; -* Building the structure of a Bayesian network on data using three algorithms - Hill Climbing, evolutionary and PC (PC is currently under development). For Hill Climbing, the following score functions are implemented - MI, K2, BIC, AIC. The algorithms work on both discrete and mixed data. -* Learning the parameters of distributions in the nodes of the network based on Gaussian distribution and Mixture Gaussian distribution with automatic selection of the number of components. -* Non-parametric learning of distributions at nodes using classification and regression models. -* BigBraveBN - algorithm for structural learning of Bayesian networks with a large number of nodes. Tested on networks with up to 500 nodes. - -Difference from existing implementations: - - -* Algorithms work on mixed data; -* Structural learning implements score-functions for mixed data; -* Parametric learning implements the use of a mixture of Gaussian distributions to approximate continuous distributions; -* Non-parametric learning of distributions with various user-specified regression and classification models; -* The algorithm for structural training of large Bayesian networks (> 10 nodes) is based on local training of small networks with their subsequent algorithmic connection. - -.. image:: img/BN_gif.gif - :target: img/BN_gif.gif - :align: center - :alt: bn example gif - -For example, in terms of data analysis and modeling using Bayesian networks, a pipeline has been implemented to generate synthetic data by sampling from Bayesian networks. - - - -.. image:: img/synth_gen.png - :target: img/synth_gen.png - :align: center - :height: 300px - :width: 600px - :alt: synthetics generation - - -How to use -^^^^^^^^^^ - -Then the necessary classes are imported from the library: - -.. code-block:: python - - from bamt.networks.hybrid_bn import HybridBN - -Next, a network instance is created and training (structure and parameters) is performed: - -.. code-block:: python - - bn = HybridBN(has_logit=False, use_mixture=True) - bn.add_edges(preprocessed_data) - bn.fit_parameters(data) - - - -Examples & Tutorials -^^^^^^^^^^^^^^^^^^^^^^ - -More examples can be found in `Documentation `__. - -Publications about BAMT -^^^^^^^^^^^^^^^^^^^^^^^ - -We have published several articles about BAMT: - -* `Advanced Approach for Distributions Parameters Learning in Bayesian Networks with Gaussian Mixture Models and Discriminative Models `__ (2023) -* `BigBraveBN: algorithm of structural learning for bayesian networks with a large number of nodes `__ (2022) -* `MIxBN: Library for learning Bayesian networks from mixed data `__ (2021) -* `Oil and Gas Reservoirs Parameters Analysis Using Mixed Learning of Bayesian Networks `__ (2021) -* `Bayesian Networks-based personal data synthesis `__ (2020) - - -Project structure -^^^^^^^^^^^^^^^^^ - -The latest stable version of the library is available in the master branch. - -It includes the following modules and direcotries: - -* `bamt `__ - directory with the framework code: - * Preprocessing - module for data preprocessing - * Networks - module for building and training Bayesian networks - * Nodes - module for nodes support of Bayesian networks - * Utilities - module for mathematical and graph utilities -* `data `__ - directory with data for experiments and tests -* `tests `__ - directory with unit and integration tests -* `tutorials `__ - directory with tutorials -* `docs `__ - directory with RTD documentation - -Preprocessing -============= - -Preprocessor module allows user to transform data according pipeline (similar to pipeline in scikit-learn). - -Networks -======== - -Three types of networks are implemented: - -* HybridBN - Bayesian network with mixed data -* DiscreteBN - Bayesian network with discrete data -* ContinuousBN - Bayesian network with continuous data - -They are inherited from the abstract class BaseNetwork. - -Nodes -===== - -Contains classes for nodes of Bayesian networks. - -Utilities -========= - -Utilities module contains mathematical and graph utilities to support the main functionality of the library. - - -Web-BAMT -^^^^^^^^ - -A web interface for BAMT is currently under development. -The repository is available at `web-BAMT `__ - -Contacts -^^^^^^^^ - -If you have questions or suggestions, you can contact us at the following address: ideeva@itmo.ru (Irina Deeva) - -Our resources: - -* `Natural Systems Simulation Team `__ -* `NSS team Telegram channel `__ -* `NSS lab YouTube channel `__ - - -Citation -^^^^^^^^ - -@misc{BAMT, - author={BAMT}, - title = {Repository experiments and data}, - year = {2021}, - publisher = {GitHub}, - journal = {GitHub repository}, - howpublished = {\url{https://github.com/ITMO-NSS-team/BAMT.git}}, - url = {https://github.com/ITMO-NSS-team/BAMT.git} -} - -@article{deeva2023advanced, - title={Advanced Approach for Distributions Parameters Learning in Bayesian Networks with Gaussian Mixture Models and Discriminative Models}, - author={Deeva, Irina and Bubnova, Anna and Kalyuzhnaya, Anna V}, - journal={Mathematics}, - volume={11}, - number={2}, - pages={343}, - year={2023}, - publisher={MDPI} -} - - - - -.. |docs| image:: https://readthedocs.org/projects/bamt/badge/?version=latest - :target: https://bamt.readthedocs.io/en/latest/?badge=latest - :alt: Documentation Status - -.. |ITMO| image:: https://raw.githubusercontent.com/ITMO-NSS-team/open-source-ops/cd771018e80e9164f7b661bd2191061ab58f94de/badges/ITMO_badge.svg - -.. |NCCR| image:: https://raw.githubusercontent.com/ITMO-NSS-team/open-source-ops/cd771018e80e9164f7b661bd2191061ab58f94de/badges/NCCR_badge.svg - -.. |pypi| image:: https://badge.fury.io/py/bamt.svg - :target: https://badge.fury.io/py/bamt - -.. |py_10| image:: https://img.shields.io/badge/python_3.10-passing-success - :alt: Supported Python Versions - :target: https://img.shields.io/badge/python_3.10-passing-success - -.. |py_8| image:: https://img.shields.io/badge/python_3.8-passing-success - :alt: Supported Python Versions - :target: https://img.shields.io/badge/python_3.8-passing-success - -.. |py_9| image:: https://img.shields.io/badge/python_3.9-passing-success - :alt: Supported Python Versions - :target: https://img.shields.io/badge/python_3.9-passing-success - -.. |license| image:: https://img.shields.io/github/license/ITMO-NSS-team/BAMT - :alt: Supported Python Versions - :target: https://github.com/ITMO-NSS-team/BAMT/blob/master/LICENCE - -.. |downloads_stats| image:: https://static.pepy.tech/personalized-badge/bamt?period=total&units=international_system&left_color=grey&right_color=blue&left_text=downloads - :target: https://pepy.tech/project/bamt - -.. |downloads_monthly| image:: https://static.pepy.tech/personalized-badge/bamt?period=month&units=international_system&left_color=grey&right_color=blue&left_text=downloads/month - :target: https://pepy.tech/project/bamt - -.. |downloads_weekly| image:: https://static.pepy.tech/personalized-badge/bamt?period=week&units=international_system&left_color=grey&right_color=blue&left_text=downloads/week - :target: https://pepy.tech/project/bamt - -.. |Build| image:: https://github.com/ITMO-NSS-team/BAMT/actions/workflows/bamtcodecov.yml/badge.svg - :target: https://github.com/ITMO-NSS-team/BAMT/actions/workflows/bamtcodecov.yml - -.. |coverage| image:: https://codecov.io/github/aimclub/BAMT/branch/master/graph/badge.svg?token=fA4qsxGqTC - :target: https://codecov.io/github/aimclub/BAMT - -.. |Black| image:: https://img.shields.io/badge/code%20style-black-000000.svg -.. _Black: https://github.com/psf/black diff --git a/bamt/networks/base.py b/bamt/networks/base.py new file mode 100644 index 0000000..e69de29 diff --git a/data/benchmark/auto_price.csv b/data/benchmark/auto_price.csv new file mode 100644 index 0000000..4e073ad --- /dev/null +++ b/data/benchmark/auto_price.csv @@ -0,0 +1,160 @@ +,symboling,normalized-losses,wheel-base,length,width,height,curb-weight,engine-size,bore,stroke,compression-ratio,horsepower,peak-rpm,city-mpg,highway-mpg,target +0,2.0,164.0,99.8000030517578,176.60000610351562,66.19999694824219,54.29999923706055,2337.0,109.0,3.190000057220459,3.400000095367432,10.0,102.0,5500.0,24.0,30.0,13950.0 +1,2.0,164.0,99.4000015258789,176.60000610351562,66.4000015258789,54.29999923706055,2824.0,136.0,3.190000057220459,3.400000095367432,8.0,115.0,5500.0,18.0,22.0,17450.0 +2,1.0,158.0,105.8000030517578,192.6999969482422,71.4000015258789,55.70000076293945,2844.0,136.0,3.190000057220459,3.400000095367432,8.5,110.0,5500.0,19.0,25.0,17710.0 +3,1.0,158.0,105.8000030517578,192.6999969482422,71.4000015258789,55.900001525878906,3086.0,131.0,3.130000114440918,3.400000095367432,8.300000190734863,140.0,5500.0,17.0,20.0,23875.0 +4,2.0,192.0,101.1999969482422,176.8000030517578,64.80000305175781,54.29999923706055,2395.0,108.0,3.5,2.799999952316284,8.800000190734863,101.0,5800.0,23.0,29.0,16430.0 +5,0.0,192.0,101.1999969482422,176.8000030517578,64.80000305175781,54.29999923706055,2395.0,108.0,3.5,2.799999952316284,8.800000190734863,101.0,5800.0,23.0,29.0,16925.0 +6,0.0,188.0,101.1999969482422,176.8000030517578,64.80000305175781,54.29999923706055,2710.0,164.0,3.309999942779541,3.190000057220459,9.0,121.0,4250.0,21.0,28.0,20970.0 +7,0.0,188.0,101.1999969482422,176.8000030517578,64.80000305175781,54.29999923706055,2765.0,164.0,3.309999942779541,3.190000057220459,9.0,121.0,4250.0,21.0,28.0,21105.0 +8,2.0,121.0,88.4000015258789,141.10000610351562,60.29999923706055,53.20000076293945,1488.0,61.0,2.9100000858306885,3.0299999713897705,9.5,48.0,5100.0,47.0,53.0,5151.0 +9,1.0,98.0,94.5,155.89999389648438,63.59999847412109,52.0,1874.0,90.0,3.0299999713897705,3.109999895095825,9.600000381469728,70.0,5400.0,38.0,43.0,6295.0 +10,0.0,81.0,94.5,158.8000030517578,63.59999847412109,52.0,1909.0,90.0,3.0299999713897705,3.109999895095825,9.600000381469728,70.0,5400.0,38.0,43.0,6575.0 +11,1.0,118.0,93.6999969482422,157.3000030517578,63.79999923706055,50.79999923706055,1876.0,90.0,2.970000028610229,3.2300000190734863,9.40999984741211,68.0,5500.0,37.0,41.0,5572.0 +12,1.0,118.0,93.6999969482422,157.3000030517578,63.79999923706055,50.79999923706055,1876.0,90.0,2.970000028610229,3.2300000190734863,9.399999618530272,68.0,5500.0,31.0,38.0,6377.0 +13,1.0,118.0,93.6999969482422,157.3000030517578,63.79999923706055,50.79999923706055,2128.0,98.0,3.0299999713897705,3.390000104904175,7.599999904632568,102.0,5500.0,24.0,30.0,7957.0 +14,1.0,148.0,93.6999969482422,157.3000030517578,63.79999923706055,50.59999847412109,1967.0,90.0,2.970000028610229,3.2300000190734863,9.399999618530272,68.0,5500.0,31.0,38.0,6229.0 +15,1.0,148.0,93.6999969482422,157.3000030517578,63.79999923706055,50.59999847412109,1989.0,90.0,2.970000028610229,3.2300000190734863,9.399999618530272,68.0,5500.0,31.0,38.0,6692.0 +16,1.0,148.0,93.6999969482422,157.3000030517578,63.79999923706055,50.59999847412109,1989.0,90.0,2.970000028610229,3.2300000190734863,9.399999618530272,68.0,5500.0,31.0,38.0,7609.0 +17,-1.0,110.0,103.3000030517578,174.60000610351562,64.5999984741211,59.79999923706055,2535.0,122.0,3.3399999141693115,3.4600000381469727,8.5,88.0,5000.0,24.0,30.0,8921.0 +18,3.0,145.0,95.9000015258789,173.1999969482422,66.30000305175781,50.20000076293945,2811.0,156.0,3.5999999046325684,3.900000095367432,7.0,145.0,5000.0,19.0,24.0,12964.0 +19,2.0,137.0,86.5999984741211,144.60000610351562,63.900001525878906,50.79999923706055,1713.0,92.0,2.9100000858306885,3.4100000858306885,9.600000381469728,58.0,4800.0,49.0,54.0,6479.0 +20,2.0,137.0,86.5999984741211,144.60000610351562,63.900001525878906,50.79999923706055,1819.0,92.0,2.9100000858306885,3.4100000858306885,9.199999809265137,76.0,6000.0,31.0,38.0,6855.0 +21,1.0,101.0,93.6999969482422,150.0,64.0,52.59999847412109,1837.0,79.0,2.9100000858306885,3.069999933242798,10.100000381469728,60.0,5500.0,38.0,42.0,5399.0 +22,1.0,101.0,93.6999969482422,150.0,64.0,52.59999847412109,1940.0,92.0,2.9100000858306885,3.4100000858306885,9.199999809265137,76.0,6000.0,30.0,34.0,6529.0 +23,1.0,101.0,93.6999969482422,150.0,64.0,52.59999847412109,1956.0,92.0,2.9100000858306885,3.4100000858306885,9.199999809265137,76.0,6000.0,30.0,34.0,7129.0 +24,0.0,110.0,96.5,163.39999389648438,64.0,54.5,2010.0,92.0,2.9100000858306885,3.4100000858306885,9.199999809265137,76.0,6000.0,30.0,34.0,7295.0 +25,0.0,78.0,96.5,157.10000610351562,63.900001525878906,58.29999923706055,2024.0,92.0,2.9200000762939453,3.4100000858306885,9.199999809265137,76.0,6000.0,30.0,34.0,7295.0 +26,0.0,106.0,96.5,167.5,65.19999694824219,53.29999923706055,2236.0,110.0,3.150000095367432,3.5799999237060547,9.0,86.0,5800.0,27.0,33.0,7895.0 +27,0.0,106.0,96.5,167.5,65.19999694824219,53.29999923706055,2289.0,110.0,3.150000095367432,3.5799999237060547,9.0,86.0,5800.0,27.0,33.0,9095.0 +28,0.0,85.0,96.5,175.39999389648438,65.19999694824219,54.09999847412109,2304.0,110.0,3.150000095367432,3.5799999237060547,9.0,86.0,5800.0,27.0,33.0,8845.0 +29,0.0,85.0,96.5,175.39999389648438,62.5,54.09999847412109,2372.0,110.0,3.150000095367432,3.5799999237060547,9.0,86.0,5800.0,27.0,33.0,10295.0 +30,0.0,85.0,96.5,175.39999389648438,65.19999694824219,54.09999847412109,2465.0,110.0,3.150000095367432,3.5799999237060547,9.0,101.0,5800.0,24.0,28.0,12945.0 +31,1.0,107.0,96.5,169.10000610351562,66.0,51.0,2293.0,110.0,3.150000095367432,3.5799999237060547,9.100000381469728,100.0,5500.0,25.0,31.0,10345.0 +32,0.0,145.0,113.0,199.6000061035156,69.5999984741211,52.79999923706055,4066.0,258.0,3.630000114440918,4.170000076293945,8.100000381469727,176.0,4750.0,15.0,19.0,32250.0 +33,1.0,104.0,93.0999984741211,159.10000610351562,64.19999694824219,54.09999847412109,1890.0,91.0,3.0299999713897705,3.150000095367432,9.0,68.0,5000.0,30.0,31.0,5195.0 +34,1.0,104.0,93.0999984741211,159.10000610351562,64.19999694824219,54.09999847412109,1900.0,91.0,3.0299999713897705,3.150000095367432,9.0,68.0,5000.0,31.0,38.0,6095.0 +35,1.0,104.0,93.0999984741211,159.10000610351562,64.19999694824219,54.09999847412109,1905.0,91.0,3.0299999713897705,3.150000095367432,9.0,68.0,5000.0,31.0,38.0,6795.0 +36,1.0,113.0,93.0999984741211,166.8000030517578,64.19999694824219,54.09999847412109,1945.0,91.0,3.0299999713897705,3.150000095367432,9.0,68.0,5000.0,31.0,38.0,6695.0 +37,1.0,113.0,93.0999984741211,166.8000030517578,64.19999694824219,54.09999847412109,1950.0,91.0,3.0799999237060547,3.150000095367432,9.0,68.0,5000.0,31.0,38.0,7395.0 +38,1.0,129.0,98.8000030517578,177.8000030517578,66.5,53.70000076293945,2385.0,122.0,3.390000104904175,3.390000104904175,8.600000381469727,84.0,4800.0,26.0,32.0,8845.0 +39,0.0,115.0,98.8000030517578,177.8000030517578,66.5,55.5,2410.0,122.0,3.390000104904175,3.390000104904175,8.600000381469727,84.0,4800.0,26.0,32.0,8495.0 +40,1.0,129.0,98.8000030517578,177.8000030517578,66.5,53.70000076293945,2385.0,122.0,3.390000104904175,3.390000104904175,8.600000381469727,84.0,4800.0,26.0,32.0,10595.0 +41,0.0,115.0,98.8000030517578,177.8000030517578,66.5,55.5,2410.0,122.0,3.390000104904175,3.390000104904175,8.600000381469727,84.0,4800.0,26.0,32.0,10245.0 +42,0.0,115.0,98.8000030517578,177.8000030517578,66.5,55.5,2425.0,122.0,3.390000104904175,3.390000104904175,8.600000381469727,84.0,4800.0,26.0,32.0,11245.0 +43,0.0,118.0,104.9000015258789,175.0,66.0999984741211,54.400001525878906,2670.0,140.0,3.759999990463257,3.1600000858306885,8.0,120.0,5000.0,19.0,27.0,18280.0 +44,-1.0,93.0,110.0,190.8999938964844,70.30000305175781,56.5,3515.0,183.0,3.5799999237060547,3.640000104904175,21.5,123.0,4350.0,22.0,25.0,25552.0 +45,-1.0,93.0,110.0,190.8999938964844,70.30000305175781,58.70000076293945,3750.0,183.0,3.5799999237060547,3.640000104904175,21.5,123.0,4350.0,22.0,25.0,28248.0 +46,0.0,93.0,106.6999969482422,187.5,70.30000305175781,54.900001525878906,3495.0,183.0,3.5799999237060547,3.640000104904175,21.5,123.0,4350.0,22.0,25.0,28176.0 +47,-1.0,93.0,115.5999984741211,202.6000061035156,71.69999694824219,56.29999923706055,3770.0,183.0,3.5799999237060547,3.640000104904175,21.5,123.0,4350.0,22.0,25.0,31600.0 +48,3.0,142.0,96.5999984741211,180.3000030517578,70.5,50.79999923706055,3685.0,234.0,3.4600000381469727,3.0999999046325684,8.300000190734863,155.0,4750.0,16.0,18.0,35056.0 +49,2.0,161.0,93.6999969482422,157.3000030517578,64.4000015258789,50.79999923706055,1918.0,92.0,2.970000028610229,3.2300000190734863,9.399999618530272,68.0,5500.0,37.0,41.0,5389.0 +50,2.0,161.0,93.6999969482422,157.3000030517578,64.4000015258789,50.79999923706055,1944.0,92.0,2.970000028610229,3.2300000190734863,9.399999618530272,68.0,5500.0,31.0,38.0,6189.0 +51,2.0,161.0,93.6999969482422,157.3000030517578,64.4000015258789,50.79999923706055,2004.0,92.0,2.970000028610229,3.2300000190734863,9.399999618530272,68.0,5500.0,31.0,38.0,6669.0 +52,1.0,161.0,93.0,157.3000030517578,63.79999923706055,50.79999923706055,2145.0,98.0,3.0299999713897705,3.390000104904175,7.599999904632568,102.0,5500.0,24.0,30.0,7689.0 +53,3.0,153.0,96.3000030517578,173.0,65.4000015258789,49.400001525878906,2370.0,110.0,3.1700000762939453,3.4600000381469727,7.5,116.0,5500.0,23.0,30.0,9959.0 +54,3.0,153.0,96.3000030517578,173.0,65.4000015258789,49.400001525878906,2328.0,122.0,3.3499999046325684,3.4600000381469727,8.5,88.0,5000.0,25.0,32.0,8499.0 +55,1.0,125.0,96.3000030517578,172.39999389648438,65.4000015258789,51.59999847412109,2365.0,122.0,3.3499999046325684,3.4600000381469727,8.5,88.0,5000.0,25.0,32.0,6989.0 +56,1.0,125.0,96.3000030517578,172.39999389648438,65.4000015258789,51.59999847412109,2405.0,122.0,3.3499999046325684,3.4600000381469727,8.5,88.0,5000.0,25.0,32.0,8189.0 +57,1.0,125.0,96.3000030517578,172.39999389648438,65.4000015258789,51.59999847412109,2403.0,110.0,3.1700000762939453,3.4600000381469727,7.5,116.0,5500.0,23.0,30.0,9279.0 +58,-1.0,137.0,96.3000030517578,172.39999389648438,65.4000015258789,51.59999847412109,2403.0,110.0,3.1700000762939453,3.4600000381469727,7.5,116.0,5500.0,23.0,30.0,9279.0 +59,1.0,128.0,94.5,165.3000030517578,63.79999923706055,54.5,1889.0,97.0,3.150000095367432,3.289999961853028,9.399999618530272,69.0,5200.0,31.0,37.0,5499.0 +60,1.0,128.0,94.5,165.3000030517578,63.79999923706055,54.5,2017.0,103.0,2.990000009536743,3.470000028610229,21.899999618530277,55.0,4800.0,45.0,50.0,7099.0 +61,1.0,128.0,94.5,165.3000030517578,63.79999923706055,54.5,1918.0,97.0,3.150000095367432,3.289999961853028,9.399999618530272,69.0,5200.0,31.0,37.0,6649.0 +62,1.0,122.0,94.5,165.3000030517578,63.79999923706055,54.5,1938.0,97.0,3.150000095367432,3.289999961853028,9.399999618530272,69.0,5200.0,31.0,37.0,6849.0 +63,1.0,103.0,94.5,170.1999969482422,63.79999923706055,53.5,2024.0,97.0,3.150000095367432,3.289999961853028,9.399999618530272,69.0,5200.0,31.0,37.0,7349.0 +64,1.0,128.0,94.5,165.3000030517578,63.79999923706055,54.5,1951.0,97.0,3.150000095367432,3.289999961853028,9.399999618530272,69.0,5200.0,31.0,37.0,7299.0 +65,1.0,128.0,94.5,165.60000610351562,63.79999923706055,53.29999923706055,2028.0,97.0,3.150000095367432,3.289999961853028,9.399999618530272,69.0,5200.0,31.0,37.0,7799.0 +66,1.0,122.0,94.5,165.3000030517578,63.79999923706055,54.5,1971.0,97.0,3.150000095367432,3.289999961853028,9.399999618530272,69.0,5200.0,31.0,37.0,7499.0 +67,1.0,103.0,94.5,170.1999969482422,63.79999923706055,53.5,2037.0,97.0,3.150000095367432,3.289999961853028,9.399999618530272,69.0,5200.0,31.0,37.0,7999.0 +68,2.0,168.0,95.0999984741211,162.39999389648438,63.79999923706055,53.29999923706055,2008.0,97.0,3.150000095367432,3.289999961853028,9.399999618530272,69.0,5200.0,31.0,37.0,8249.0 +69,0.0,106.0,97.1999969482422,173.39999389648438,65.19999694824219,54.70000076293945,2324.0,120.0,3.3299999237060547,3.470000028610229,8.5,97.0,5200.0,27.0,34.0,8949.0 +70,0.0,106.0,97.1999969482422,173.39999389648438,65.19999694824219,54.70000076293945,2302.0,120.0,3.3299999237060547,3.470000028610229,8.5,97.0,5200.0,27.0,34.0,9549.0 +71,0.0,128.0,100.4000015258789,181.6999969482422,66.5,55.09999847412109,3095.0,181.0,3.430000066757202,3.2699999809265137,9.0,152.0,5200.0,17.0,22.0,13499.0 +72,0.0,108.0,100.4000015258789,184.6000061035156,66.5,56.09999847412109,3296.0,181.0,3.430000066757202,3.2699999809265137,9.0,152.0,5200.0,17.0,22.0,14399.0 +73,0.0,108.0,100.4000015258789,184.6000061035156,66.5,55.09999847412109,3060.0,181.0,3.430000066757202,3.2699999809265137,9.0,152.0,5200.0,19.0,25.0,13499.0 +74,3.0,194.0,91.3000030517578,170.6999969482422,67.9000015258789,49.70000076293945,3071.0,181.0,3.430000066757202,3.2699999809265137,9.0,160.0,5200.0,19.0,25.0,17199.0 +75,3.0,194.0,91.3000030517578,170.6999969482422,67.9000015258789,49.70000076293945,3139.0,181.0,3.430000066757202,3.2699999809265137,7.800000190734863,200.0,5200.0,17.0,23.0,19699.0 +76,1.0,231.0,99.1999969482422,178.5,67.9000015258789,49.70000076293945,3139.0,181.0,3.430000066757202,3.2699999809265137,9.0,160.0,5200.0,19.0,25.0,18399.0 +77,0.0,161.0,107.9000015258789,186.6999969482422,68.4000015258789,56.70000076293945,3020.0,120.0,3.4600000381469727,3.190000057220459,8.399999618530273,97.0,5000.0,19.0,24.0,11900.0 +78,0.0,161.0,107.9000015258789,186.6999969482422,68.4000015258789,56.70000076293945,3197.0,152.0,3.700000047683716,3.5199999809265137,21.0,95.0,4150.0,28.0,33.0,13200.0 +79,0.0,161.0,107.9000015258789,186.6999969482422,68.4000015258789,56.70000076293945,3075.0,120.0,3.4600000381469727,2.190000057220459,8.399999618530273,95.0,5000.0,19.0,24.0,15580.0 +80,0.0,161.0,107.9000015258789,186.6999969482422,68.4000015258789,56.70000076293945,3252.0,152.0,3.700000047683716,3.5199999809265137,21.0,95.0,4150.0,28.0,33.0,16900.0 +81,0.0,161.0,107.9000015258789,186.6999969482422,68.4000015258789,56.70000076293945,3075.0,120.0,3.4600000381469727,3.190000057220459,8.399999618530273,97.0,5000.0,19.0,24.0,16630.0 +82,0.0,161.0,107.9000015258789,186.6999969482422,68.4000015258789,56.70000076293945,3252.0,152.0,3.700000047683716,3.5199999809265137,21.0,95.0,4150.0,28.0,33.0,17950.0 +83,0.0,161.0,108.0,186.6999969482422,68.30000305175781,56.0,3130.0,134.0,3.609999895095825,3.2100000381469727,7.0,142.0,5600.0,18.0,24.0,18150.0 +84,1.0,119.0,93.6999969482422,157.3000030517578,63.79999923706055,50.79999923706055,1918.0,90.0,2.970000028610229,3.2300000190734863,9.399999618530272,68.0,5500.0,37.0,41.0,5572.0 +85,1.0,119.0,93.6999969482422,157.3000030517578,63.79999923706055,50.79999923706055,2128.0,98.0,3.0299999713897705,3.390000104904175,7.599999904632568,102.0,5500.0,24.0,30.0,7957.0 +86,1.0,154.0,93.6999969482422,157.3000030517578,63.79999923706055,50.59999847412109,1967.0,90.0,2.970000028610229,3.2300000190734863,9.399999618530272,68.0,5500.0,31.0,38.0,6229.0 +87,1.0,154.0,93.6999969482422,167.3000030517578,63.79999923706055,50.79999923706055,1989.0,90.0,2.970000028610229,3.2300000190734863,9.399999618530272,68.0,5500.0,31.0,38.0,6692.0 +88,1.0,154.0,93.6999969482422,167.3000030517578,63.79999923706055,50.79999923706055,2191.0,98.0,2.970000028610229,3.2300000190734863,9.399999618530272,68.0,5500.0,31.0,38.0,7609.0 +89,-1.0,74.0,103.3000030517578,174.60000610351562,64.5999984741211,59.79999923706055,2535.0,122.0,3.3499999046325684,3.4600000381469727,8.5,88.0,5000.0,24.0,30.0,8921.0 +90,3.0,186.0,94.5,168.89999389648438,68.30000305175781,50.20000076293945,2778.0,151.0,3.940000057220459,3.109999895095825,9.5,143.0,5500.0,19.0,27.0,22018.0 +91,3.0,150.0,99.0999984741211,186.6000061035156,66.5,56.09999847412109,2658.0,121.0,3.539999961853028,3.069999933242798,9.3100004196167,110.0,5250.0,21.0,28.0,11850.0 +92,2.0,104.0,99.0999984741211,186.6000061035156,66.5,56.09999847412109,2695.0,121.0,3.539999961853028,3.069999933242798,9.300000190734863,110.0,5250.0,21.0,28.0,12170.0 +93,3.0,150.0,99.0999984741211,186.6000061035156,66.5,56.09999847412109,2707.0,121.0,2.539999961853028,2.069999933242798,9.300000190734863,110.0,5250.0,21.0,28.0,15040.0 +94,2.0,104.0,99.0999984741211,186.6000061035156,66.5,56.09999847412109,2758.0,121.0,3.539999961853028,3.069999933242798,9.300000190734863,110.0,5250.0,21.0,28.0,15510.0 +95,3.0,150.0,99.0999984741211,186.6000061035156,66.5,56.09999847412109,2808.0,121.0,3.539999961853028,3.069999933242798,9.0,160.0,5500.0,19.0,26.0,18150.0 +96,2.0,104.0,99.0999984741211,186.6000061035156,66.5,56.09999847412109,2847.0,121.0,3.539999961853028,3.069999933242798,9.0,160.0,5500.0,19.0,26.0,18620.0 +97,2.0,83.0,93.6999969482422,156.89999389648438,63.400001525878906,53.70000076293945,2050.0,97.0,3.619999885559082,2.359999895095825,9.0,69.0,4900.0,31.0,36.0,5118.0 +98,2.0,83.0,93.6999969482422,157.89999389648438,63.59999847412109,53.70000076293945,2120.0,108.0,3.619999885559082,2.640000104904175,8.699999809265137,73.0,4400.0,26.0,31.0,7053.0 +99,2.0,83.0,93.3000030517578,157.3000030517578,63.79999923706055,55.70000076293945,2240.0,108.0,3.619999885559082,2.640000104904175,8.699999809265137,73.0,4400.0,26.0,31.0,7603.0 +100,0.0,102.0,97.1999969482422,172.0,65.4000015258789,52.5,2145.0,108.0,3.619999885559082,2.640000104904175,9.5,82.0,4800.0,32.0,37.0,7126.0 +101,0.0,102.0,97.1999969482422,172.0,65.4000015258789,52.5,2190.0,108.0,3.619999885559082,2.640000104904175,9.5,82.0,4400.0,28.0,33.0,7775.0 +102,0.0,102.0,97.1999969482422,172.0,65.4000015258789,52.5,2340.0,108.0,3.619999885559082,2.640000104904175,9.0,94.0,5200.0,26.0,32.0,9960.0 +103,0.0,102.0,97.0,172.0,65.4000015258789,54.29999923706055,2385.0,108.0,3.619999885559082,2.640000104904175,9.0,82.0,4800.0,24.0,25.0,9233.0 +104,0.0,102.0,97.0,172.0,65.4000015258789,54.29999923706055,2510.0,108.0,3.619999885559082,2.640000104904175,7.699999809265137,111.0,4800.0,24.0,29.0,11259.0 +105,0.0,89.0,97.0,173.5,65.4000015258789,53.0,2290.0,108.0,3.619999885559082,2.640000104904175,9.0,82.0,4800.0,28.0,32.0,7463.0 +106,0.0,89.0,97.0,173.5,65.4000015258789,53.0,2455.0,108.0,3.619999885559082,2.640000104904175,9.0,94.0,5200.0,25.0,31.0,10198.0 +107,0.0,85.0,96.9000015258789,173.60000610351562,65.4000015258789,54.900001525878906,2420.0,108.0,3.619999885559082,2.640000104904175,9.0,82.0,4800.0,23.0,29.0,8013.0 +108,0.0,85.0,96.9000015258789,173.60000610351562,65.4000015258789,54.900001525878906,2650.0,108.0,3.619999885559082,2.640000104904175,7.699999809265137,111.0,4800.0,23.0,23.0,11694.0 +109,1.0,87.0,95.6999969482422,158.6999969482422,63.59999847412109,54.5,1985.0,92.0,3.049999952316284,3.0299999713897705,9.0,62.0,4800.0,35.0,39.0,5348.0 +110,1.0,87.0,95.6999969482422,158.6999969482422,63.59999847412109,54.5,2040.0,92.0,3.049999952316284,3.0299999713897705,9.0,62.0,4800.0,31.0,38.0,6338.0 +111,1.0,74.0,95.6999969482422,158.6999969482422,63.59999847412109,54.5,2015.0,92.0,3.049999952316284,3.0299999713897705,9.0,62.0,4800.0,31.0,38.0,6488.0 +112,0.0,77.0,95.6999969482422,169.6999969482422,63.59999847412109,59.09999847412109,2280.0,92.0,3.049999952316284,3.0299999713897705,9.0,62.0,4800.0,31.0,37.0,6918.0 +113,0.0,81.0,95.6999969482422,169.6999969482422,63.59999847412109,59.09999847412109,2290.0,92.0,3.049999952316284,3.0299999713897705,9.0,62.0,4800.0,27.0,32.0,7898.0 +114,0.0,91.0,95.6999969482422,169.6999969482422,63.59999847412109,59.09999847412109,3110.0,92.0,3.049999952316284,3.0299999713897705,9.0,62.0,4800.0,27.0,32.0,8778.0 +115,0.0,91.0,95.6999969482422,166.3000030517578,64.4000015258789,53.0,2081.0,98.0,3.190000057220459,3.0299999713897705,9.0,70.0,4800.0,30.0,37.0,6938.0 +116,0.0,91.0,95.6999969482422,166.3000030517578,64.4000015258789,52.79999923706055,2109.0,98.0,3.190000057220459,3.0299999713897705,9.0,70.0,4800.0,30.0,37.0,7198.0 +117,0.0,91.0,95.6999969482422,166.3000030517578,64.4000015258789,53.0,2275.0,110.0,3.2699999809265137,3.3499999046325684,22.5,56.0,4500.0,34.0,36.0,7898.0 +118,0.0,91.0,95.6999969482422,166.3000030517578,64.4000015258789,52.79999923706055,2275.0,110.0,3.2699999809265137,3.3499999046325684,22.5,56.0,4500.0,38.0,47.0,7788.0 +119,0.0,91.0,95.6999969482422,166.3000030517578,64.4000015258789,53.0,2094.0,98.0,3.190000057220459,3.0299999713897705,9.0,70.0,4800.0,38.0,47.0,7738.0 +120,0.0,91.0,95.6999969482422,166.3000030517578,64.4000015258789,52.79999923706055,2122.0,98.0,3.190000057220459,3.0299999713897705,9.0,70.0,4800.0,28.0,34.0,8358.0 +121,0.0,91.0,95.6999969482422,166.3000030517578,64.4000015258789,52.79999923706055,2140.0,98.0,3.190000057220459,3.0299999713897705,9.0,70.0,4800.0,28.0,34.0,9258.0 +122,1.0,168.0,94.5,168.6999969482422,64.0,52.59999847412109,2169.0,98.0,3.190000057220459,3.0299999713897705,9.0,70.0,4800.0,29.0,34.0,8058.0 +123,1.0,168.0,94.5,168.6999969482422,64.0,52.59999847412109,2204.0,98.0,3.190000057220459,3.0299999713897705,9.0,70.0,4800.0,29.0,34.0,8238.0 +124,1.0,168.0,94.5,168.6999969482422,64.0,52.59999847412109,2265.0,98.0,3.240000009536743,3.0799999237060547,9.399999618530272,112.0,6600.0,26.0,29.0,9298.0 +125,1.0,168.0,94.5,168.6999969482422,64.0,52.59999847412109,2300.0,98.0,3.240000009536743,3.0799999237060547,9.399999618530272,112.0,6600.0,26.0,29.0,9538.0 +126,2.0,134.0,98.4000015258789,176.1999969482422,65.5999984741211,52.0,2540.0,146.0,3.619999885559082,3.5,9.300000190734863,116.0,4800.0,24.0,30.0,8449.0 +127,2.0,134.0,98.4000015258789,176.1999969482422,65.5999984741211,52.0,2536.0,146.0,3.619999885559082,3.5,9.300000190734863,116.0,4800.0,24.0,30.0,9639.0 +128,2.0,134.0,98.4000015258789,176.1999969482422,65.5999984741211,52.0,2551.0,146.0,3.619999885559082,3.5,9.300000190734863,116.0,4800.0,24.0,30.0,9989.0 +129,2.0,134.0,98.4000015258789,176.1999969482422,65.5999984741211,52.0,2679.0,146.0,3.619999885559082,3.5,9.300000190734863,116.0,4800.0,24.0,30.0,11199.0 +130,2.0,134.0,98.4000015258789,176.1999969482422,65.5999984741211,52.0,2714.0,146.0,3.619999885559082,3.5,9.300000190734863,116.0,4800.0,24.0,30.0,11549.0 +131,2.0,134.0,98.4000015258789,176.1999969482422,65.5999984741211,53.0,2975.0,146.0,3.619999885559082,3.5,9.300000190734863,116.0,4800.0,24.0,30.0,17669.0 +132,-1.0,65.0,102.4000015258789,175.60000610351562,66.5,54.900001525878906,2326.0,122.0,3.309999942779541,3.539999961853028,8.699999809265137,92.0,4200.0,29.0,34.0,8948.0 +133,-1.0,65.0,102.4000015258789,175.60000610351562,66.5,54.900001525878906,2480.0,110.0,3.2699999809265137,3.3499999046325684,22.5,73.0,4500.0,30.0,33.0,10698.0 +134,-1.0,65.0,102.4000015258789,175.60000610351562,66.5,53.900001525878906,2414.0,122.0,3.309999942779541,3.539999961853028,8.699999809265137,92.0,4200.0,27.0,32.0,9988.0 +135,-1.0,65.0,102.4000015258789,175.60000610351562,66.5,54.900001525878906,2414.0,122.0,3.309999942779541,3.539999961853028,8.699999809265137,92.0,4200.0,27.0,32.0,10898.0 +136,-1.0,65.0,102.4000015258789,175.60000610351562,66.5,53.900001525878906,2458.0,122.0,3.309999942779541,3.539999961853028,8.699999809265137,92.0,4200.0,27.0,32.0,11248.0 +137,3.0,197.0,102.9000015258789,183.5,67.69999694824219,52.0,2976.0,171.0,3.2699999809265137,3.3499999046325684,9.300000190734863,161.0,5200.0,20.0,24.0,16558.0 +138,3.0,197.0,102.9000015258789,183.5,67.69999694824219,52.0,3016.0,171.0,3.2699999809265137,3.3499999046325684,9.300000190734863,161.0,5200.0,19.0,24.0,15998.0 +139,-1.0,90.0,104.5,187.8000030517578,66.5,54.09999847412109,3131.0,171.0,3.2699999809265137,3.3499999046325684,9.199999809265137,156.0,5200.0,20.0,24.0,15690.0 +140,2.0,122.0,97.3000030517578,171.6999969482422,65.5,55.70000076293945,2261.0,97.0,3.009999990463257,3.400000095367432,23.0,52.0,4800.0,37.0,46.0,7775.0 +141,2.0,122.0,97.3000030517578,171.6999969482422,65.5,55.70000076293945,2209.0,109.0,3.190000057220459,3.400000095367432,9.0,85.0,5250.0,27.0,34.0,7975.0 +142,2.0,94.0,97.3000030517578,171.6999969482422,65.5,55.70000076293945,2264.0,97.0,3.009999990463257,3.400000095367432,23.0,52.0,4800.0,37.0,46.0,7995.0 +143,2.0,94.0,97.3000030517578,171.6999969482422,65.5,55.70000076293945,2212.0,109.0,3.190000057220459,3.400000095367432,9.0,85.0,5250.0,27.0,34.0,8195.0 +144,2.0,94.0,97.3000030517578,171.6999969482422,65.5,55.70000076293945,2275.0,109.0,3.190000057220459,3.400000095367432,9.0,85.0,5250.0,27.0,34.0,8495.0 +145,2.0,94.0,97.3000030517578,171.6999969482422,65.5,55.70000076293945,2319.0,97.0,3.009999990463257,3.400000095367432,23.0,68.0,4500.0,37.0,42.0,9495.0 +146,2.0,94.0,97.3000030517578,171.6999969482422,65.5,55.70000076293945,2300.0,109.0,3.190000057220459,3.400000095367432,10.0,100.0,5500.0,26.0,32.0,9995.0 +147,3.0,256.0,94.5,165.6999969482422,64.0,51.400001525878906,2221.0,109.0,3.190000057220459,3.400000095367432,8.5,90.0,5500.0,24.0,29.0,9980.0 +148,-2.0,103.0,104.3000030517578,188.8000030517578,67.19999694824219,56.20000076293945,2912.0,141.0,3.7799999713897705,3.150000095367432,9.5,114.0,5400.0,23.0,28.0,12940.0 +149,-1.0,74.0,104.3000030517578,188.8000030517578,67.19999694824219,57.5,3034.0,141.0,3.7799999713897705,3.150000095367432,9.5,114.0,5400.0,23.0,28.0,13415.0 +150,-2.0,103.0,104.3000030517578,188.8000030517578,67.19999694824219,56.20000076293945,2935.0,141.0,3.7799999713897705,3.150000095367432,9.5,114.0,5400.0,24.0,28.0,15985.0 +151,-1.0,74.0,104.3000030517578,188.8000030517578,67.19999694824219,57.5,3042.0,141.0,3.7799999713897705,3.150000095367432,9.5,114.0,5400.0,24.0,28.0,16515.0 +152,-2.0,103.0,104.3000030517578,188.8000030517578,67.19999694824219,56.20000076293945,3045.0,130.0,3.619999885559082,3.150000095367432,7.5,162.0,5100.0,17.0,22.0,18420.0 +153,-1.0,74.0,104.3000030517578,188.8000030517578,67.19999694824219,57.5,3157.0,130.0,3.619999885559082,3.150000095367432,7.5,162.0,5100.0,17.0,22.0,18950.0 +154,-1.0,95.0,109.0999984741211,188.8000030517578,68.9000015258789,55.5,2952.0,141.0,3.7799999713897705,3.150000095367432,9.5,114.0,5400.0,23.0,28.0,16845.0 +155,-1.0,95.0,109.0999984741211,188.8000030517578,68.80000305175781,55.5,3049.0,141.0,3.7799999713897705,3.150000095367432,8.699999809265137,160.0,5300.0,19.0,25.0,19045.0 +156,-1.0,95.0,109.0999984741211,188.8000030517578,68.9000015258789,55.5,3012.0,173.0,3.5799999237060547,2.869999885559082,8.800000190734863,134.0,5500.0,18.0,23.0,21485.0 +157,-1.0,95.0,109.0999984741211,188.8000030517578,68.9000015258789,55.5,3217.0,145.0,3.009999990463257,3.400000095367432,23.0,106.0,4800.0,26.0,27.0,22470.0 +158,-1.0,95.0,109.0999984741211,188.8000030517578,68.9000015258789,55.5,3062.0,141.0,3.7799999713897705,3.150000095367432,9.5,114.0,5400.0,19.0,25.0,22625.0 diff --git a/data/benchmark/new_thyroid.csv b/data/benchmark/new_thyroid.csv new file mode 100644 index 0000000..2c6a086 --- /dev/null +++ b/data/benchmark/new_thyroid.csv @@ -0,0 +1,216 @@ +,2,3,4,5,6,target +0,107.0,10.1,2.2,0.9,2.7,1 +1,113.0,9.9,3.1,2.0,5.9,1 +2,127.0,12.9,2.4,1.4,0.6,1 +3,109.0,5.3,1.6,1.4,1.5,1 +4,105.0,7.3,1.5,1.5,-0.1,1 +5,105.0,6.1,2.1,1.4,7.0,1 +6,110.0,10.4,1.6,1.6,2.7,1 +7,114.0,9.9,2.4,1.5,5.7,1 +8,106.0,9.4,2.2,1.5,0.0,1 +9,107.0,13.0,1.1,0.9,3.1,1 +10,106.0,4.2,1.2,1.6,1.4,1 +11,110.0,11.3,2.3,0.9,3.3,1 +12,116.0,9.2,2.7,1.0,4.2,1 +13,112.0,8.1,1.9,3.7,2.0,1 +14,122.0,9.7,1.6,0.9,2.2,1 +15,109.0,8.4,2.1,1.1,3.6,1 +16,111.0,8.4,1.5,0.8,1.2,1 +17,114.0,6.7,1.5,1.0,3.5,1 +18,119.0,10.6,2.1,1.3,1.1,1 +19,115.0,7.1,1.3,1.3,2.0,1 +20,101.0,7.8,1.2,1.0,1.7,1 +21,103.0,10.1,1.3,0.7,0.1,1 +22,109.0,10.4,1.9,0.4,-0.1,1 +23,102.0,7.6,1.8,2.0,2.5,1 +24,121.0,10.1,1.7,1.3,0.1,1 +25,100.0,6.1,2.4,1.8,3.8,1 +26,106.0,9.6,2.4,1.0,1.3,1 +27,116.0,10.1,2.2,1.6,0.8,1 +28,105.0,11.1,2.0,1.0,1.0,1 +29,110.0,10.4,1.8,1.0,2.3,1 +30,120.0,8.4,1.1,1.4,1.4,1 +31,116.0,11.1,2.0,1.2,2.3,1 +32,110.0,7.8,1.9,2.1,6.4,1 +33,90.0,8.1,1.6,1.4,1.1,1 +34,117.0,12.2,1.9,1.2,3.9,1 +35,117.0,11.0,1.4,1.5,2.1,1 +36,113.0,9.0,2.0,1.8,1.6,1 +37,106.0,9.4,1.5,0.8,0.5,1 +38,130.0,9.5,1.7,0.4,3.2,1 +39,100.0,10.5,2.4,0.9,1.9,1 +40,121.0,10.1,2.4,0.8,3.0,1 +41,110.0,9.2,1.6,1.5,0.3,1 +42,129.0,11.9,2.7,1.2,3.5,1 +43,121.0,13.5,1.5,1.6,0.5,1 +44,123.0,8.1,2.3,1.0,5.1,1 +45,107.0,8.4,1.8,1.5,0.8,1 +46,109.0,10.0,1.3,1.8,4.3,1 +47,120.0,6.8,1.9,1.3,1.9,1 +48,100.0,9.5,2.5,1.3,-0.2,1 +49,118.0,8.1,1.9,1.5,13.7,1 +50,100.0,11.3,2.5,0.7,-0.3,1 +51,103.0,12.2,1.2,1.3,2.7,1 +52,115.0,8.1,1.7,0.6,2.2,1 +53,119.0,8.0,2.0,0.6,3.2,1 +54,106.0,9.4,1.7,0.9,3.1,1 +55,114.0,10.9,2.1,0.3,1.4,1 +56,93.0,8.9,1.5,0.8,2.7,1 +57,120.0,10.4,2.1,1.1,1.8,1 +58,106.0,11.3,1.8,0.9,1.0,1 +59,110.0,8.7,1.9,1.6,4.4,1 +60,103.0,8.1,1.4,0.5,3.8,1 +61,101.0,7.1,2.2,0.8,2.2,1 +62,115.0,10.4,1.8,1.6,2.0,1 +63,116.0,10.0,1.7,1.5,4.3,1 +64,117.0,9.2,1.9,1.5,6.8,1 +65,106.0,6.7,1.5,1.2,3.9,1 +66,118.0,10.5,2.1,0.7,3.5,1 +67,97.0,7.8,1.3,1.2,0.9,1 +68,113.0,11.1,1.7,0.8,2.3,1 +69,104.0,6.3,2.0,1.2,4.0,1 +70,96.0,9.4,1.5,1.0,3.1,1 +71,120.0,12.4,2.4,0.8,1.9,1 +72,133.0,9.7,2.9,0.8,1.9,1 +73,126.0,9.4,2.3,1.0,4.0,1 +74,113.0,8.5,1.8,0.8,0.5,1 +75,109.0,9.7,1.4,1.1,2.1,1 +76,119.0,12.9,1.5,1.3,3.6,1 +77,101.0,7.1,1.6,1.5,1.6,1 +78,108.0,10.4,2.1,1.3,2.4,1 +79,117.0,6.7,2.2,1.8,6.7,1 +80,115.0,15.3,2.3,2.0,2.0,1 +81,91.0,8.0,1.7,2.1,4.6,1 +82,103.0,8.5,1.8,1.9,1.1,1 +83,98.0,9.1,1.4,1.9,-0.3,1 +84,111.0,7.8,2.0,1.8,4.1,1 +85,107.0,13.0,1.5,2.8,1.7,1 +86,119.0,11.4,2.3,2.2,1.6,1 +87,122.0,11.8,2.7,1.7,2.3,1 +88,105.0,8.1,2.0,1.9,-0.5,1 +89,109.0,7.6,1.3,2.2,1.9,1 +90,105.0,9.5,1.8,1.6,3.6,1 +91,112.0,5.9,1.7,2.0,1.3,1 +92,112.0,9.5,2.0,1.2,0.7,1 +93,98.0,8.6,1.6,1.6,6.0,1 +94,109.0,12.4,2.3,1.7,0.8,1 +95,114.0,9.1,2.6,1.5,1.5,1 +96,114.0,11.1,2.4,2.0,-0.3,1 +97,110.0,8.4,1.4,1.0,1.9,1 +98,120.0,7.1,1.2,1.5,4.3,1 +99,108.0,10.9,1.2,1.9,1.0,1 +100,108.0,8.7,1.2,2.2,2.5,1 +101,116.0,11.9,1.8,1.9,1.5,1 +102,113.0,11.5,1.5,1.9,2.9,1 +103,105.0,7.0,1.5,2.7,4.3,1 +104,114.0,8.4,1.6,1.6,-0.2,1 +105,114.0,8.1,1.6,1.6,0.5,1 +106,105.0,11.1,1.1,0.8,1.2,1 +107,107.0,13.8,1.5,1.0,1.9,1 +108,116.0,11.5,1.8,1.4,5.4,1 +109,102.0,9.5,1.4,1.1,1.6,1 +110,116.0,16.1,0.9,1.3,1.5,1 +111,118.0,10.6,1.8,1.4,3.0,1 +112,109.0,8.9,1.7,1.0,0.9,1 +113,110.0,7.0,1.0,1.6,4.3,1 +114,104.0,9.6,1.1,1.3,0.8,1 +115,105.0,8.7,1.5,1.1,1.5,1 +116,102.0,8.5,1.2,1.3,1.4,1 +117,112.0,6.8,1.7,1.4,3.3,1 +118,111.0,8.5,1.6,1.1,3.9,1 +119,111.0,8.5,1.6,1.2,7.7,1 +120,103.0,7.3,1.0,0.7,0.5,1 +121,98.0,10.4,1.6,2.3,-0.7,1 +122,117.0,7.8,2.0,1.0,3.9,1 +123,111.0,9.1,1.7,1.2,4.1,1 +124,101.0,6.3,1.5,0.9,2.9,1 +125,106.0,8.9,0.7,1.0,2.3,1 +126,102.0,8.4,1.5,0.8,2.4,1 +127,115.0,10.6,0.8,2.1,4.6,1 +128,130.0,10.0,1.6,0.9,4.6,1 +129,101.0,6.7,1.3,1.0,5.7,1 +130,110.0,6.3,1.0,0.8,1.0,1 +131,103.0,9.5,2.9,1.4,-0.1,1 +132,113.0,7.8,2.0,1.1,3.0,1 +133,112.0,10.6,1.6,0.9,-0.1,1 +134,118.0,6.5,1.2,1.2,1.7,1 +135,109.0,9.2,1.8,1.1,4.4,1 +136,116.0,7.8,1.4,1.1,3.7,1 +137,127.0,7.7,1.8,1.9,6.4,1 +138,108.0,6.5,1.0,0.9,1.5,1 +139,108.0,7.1,1.3,1.6,2.2,1 +140,105.0,5.7,1.0,0.9,0.9,1 +141,98.0,5.7,0.4,1.3,2.8,1 +142,112.0,6.5,1.2,1.2,2.0,1 +143,118.0,12.2,1.5,1.0,2.3,1 +144,94.0,7.5,1.2,1.3,4.4,1 +145,126.0,10.4,1.7,1.2,3.5,1 +146,114.0,7.5,1.1,1.6,4.4,1 +147,111.0,11.9,2.3,0.9,3.8,1 +148,104.0,6.1,1.8,0.5,0.8,1 +149,102.0,6.6,1.2,1.4,1.3,1 +150,139.0,16.4,3.8,1.1,-0.2,2 +151,111.0,16.0,2.1,0.9,-0.1,2 +152,113.0,17.2,1.8,1.0,0.0,2 +153,65.0,25.3,5.8,1.3,0.2,2 +154,88.0,24.1,5.5,0.8,0.1,2 +155,65.0,18.2,10.0,1.3,0.1,2 +156,134.0,16.4,4.8,0.6,0.1,2 +157,110.0,20.3,3.7,0.6,0.2,2 +158,67.0,23.3,7.4,1.8,-0.6,2 +159,95.0,11.1,2.7,1.6,-0.3,2 +160,89.0,14.3,4.1,0.5,0.2,2 +161,89.0,23.8,5.4,0.5,0.1,2 +162,88.0,12.9,2.7,0.1,0.2,2 +163,105.0,17.4,1.6,0.3,0.4,2 +164,89.0,20.1,7.3,1.1,-0.2,2 +165,99.0,13.0,3.6,0.7,-0.1,2 +166,80.0,23.0,10.0,0.9,-0.1,2 +167,89.0,21.8,7.1,0.7,-0.1,2 +168,99.0,13.0,3.1,0.5,-0.1,2 +169,68.0,14.7,7.8,0.6,-0.2,2 +170,97.0,14.2,3.6,1.5,0.3,2 +171,84.0,21.5,2.7,1.1,-0.6,2 +172,84.0,18.5,4.4,1.1,-0.3,2 +173,98.0,16.7,4.3,1.7,0.2,2 +174,94.0,20.5,1.8,1.4,-0.5,2 +175,99.0,17.5,1.9,1.4,0.3,2 +176,76.0,25.3,4.5,1.2,-0.1,2 +177,110.0,15.2,1.9,0.7,-0.2,2 +178,144.0,22.3,3.3,1.3,0.6,2 +179,105.0,12.0,3.3,1.1,0.0,2 +180,88.0,16.5,4.9,0.8,0.1,2 +181,97.0,15.1,1.8,1.2,-0.2,2 +182,106.0,13.4,3.0,1.1,0.0,2 +183,79.0,19.0,5.5,0.9,0.3,2 +184,92.0,11.1,2.0,0.7,-0.2,2 +185,125.0,2.3,0.9,16.5,9.5,3 +186,120.0,6.8,2.1,10.4,38.6,3 +187,108.0,3.5,0.6,1.7,1.4,3 +188,120.0,3.0,2.5,1.2,4.5,3 +189,119.0,3.8,1.1,23.0,5.7,3 +190,141.0,5.6,1.8,9.2,14.4,3 +191,129.0,1.5,0.6,12.5,2.9,3 +192,118.0,3.6,1.5,11.6,48.8,3 +193,120.0,1.9,0.7,18.5,24.0,3 +194,119.0,0.8,0.7,56.4,21.6,3 +195,123.0,5.6,1.1,13.7,56.3,3 +196,115.0,6.3,1.2,4.7,14.4,3 +197,126.0,0.5,0.2,12.2,8.8,3 +198,121.0,4.7,1.8,11.2,53.0,3 +199,131.0,2.7,0.8,9.9,4.7,3 +200,134.0,2.0,0.5,12.2,2.2,3 +201,141.0,2.5,1.3,8.5,7.5,3 +202,113.0,5.1,0.7,5.8,19.6,3 +203,136.0,1.4,0.3,32.6,8.4,3 +204,120.0,3.4,1.8,7.5,21.5,3 +205,125.0,3.7,1.1,8.5,25.9,3 +206,123.0,1.9,0.3,22.8,22.2,3 +207,112.0,2.6,0.7,41.0,19.0,3 +208,134.0,1.9,0.6,18.4,8.2,3 +209,119.0,5.1,1.1,7.0,40.8,3 +210,118.0,6.5,1.3,1.7,11.5,3 +211,139.0,4.2,0.7,4.3,6.3,3 +212,103.0,5.1,1.4,1.2,5.0,3 +213,97.0,4.7,1.1,2.1,12.6,3 +214,102.0,5.3,1.4,1.3,6.7,3 diff --git a/poetry.lock b/poetry.lock deleted file mode 100644 index 8ea5c7b..0000000 --- a/poetry.lock +++ /dev/null @@ -1,1139 +0,0 @@ -[[package]] -name = "appnope" -version = "0.1.3" -description = "Disable App Nap on macOS >= 10.9" -category = "main" -optional = false -python-versions = "*" - -[[package]] -name = "asttokens" -version = "2.2.1" -description = "Annotate AST trees with source code positions" -category = "main" -optional = false -python-versions = "*" - -[package.dependencies] -six = "*" - -[package.extras] -test = ["astroid", "pytest"] - -[[package]] -name = "attrs" -version = "22.2.0" -description = "Classes Without Boilerplate" -category = "dev" -optional = false -python-versions = ">=3.6" - -[package.extras] -cov = ["attrs", "coverage-enable-subprocess", "coverage[toml] (>=5.3)"] -dev = ["attrs"] -docs = ["furo", "sphinx", "myst-parser", "zope.interface", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier"] -tests = ["attrs", "zope.interface"] -tests-no-zope = ["hypothesis", "pympler", "pytest (>=4.3.0)", "pytest-xdist", "cloudpickle", "mypy (>=0.971,<0.990)", "pytest-mypy-plugins"] -tests_no_zope = ["hypothesis", "pympler", "pytest (>=4.3.0)", "pytest-xdist", "cloudpickle", "mypy (>=0.971,<0.990)", "pytest-mypy-plugins"] - -[[package]] -name = "backcall" -version = "0.2.0" -description = "Specifications for callback functions passed in to an API" -category = "main" -optional = false -python-versions = "*" - -[[package]] -name = "colorama" -version = "0.4.6" -description = "Cross-platform colored terminal text." -category = "main" -optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" - -[[package]] -name = "contourpy" -version = "1.0.6" -description = "Python library for calculating contours of 2D quadrilateral grids" -category = "main" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -numpy = ">=1.16" - -[package.extras] -bokeh = ["bokeh", "selenium"] -docs = ["docutils (<0.18)", "sphinx (<=5.2.0)", "sphinx-rtd-theme"] -test = ["pytest", "matplotlib", "pillow", "flake8", "isort"] -test-minimal = ["pytest"] -test-no-codebase = ["pytest", "matplotlib", "pillow"] - -[[package]] -name = "cycler" -version = "0.11.0" -description = "Composable style cycles" -category = "main" -optional = false -python-versions = ">=3.6" - -[[package]] -name = "decorator" -version = "5.1.1" -description = "Decorators for Humans" -category = "main" -optional = false -python-versions = ">=3.5" - -[[package]] -name = "executing" -version = "1.2.0" -description = "Get the currently executing AST node of a frame, and other information" -category = "main" -optional = false -python-versions = "*" - -[package.extras] -tests = ["asttokens", "pytest", "littleutils", "rich"] - -[[package]] -name = "fonttools" -version = "4.38.0" -description = "Tools to manipulate font files" -category = "main" -optional = false -python-versions = ">=3.7" - -[package.extras] -all = ["fs (>=2.2.0,<3)", "lxml (>=4.0,<5)", "zopfli (>=0.1.4)", "lz4 (>=1.7.4.2)", "matplotlib", "sympy", "skia-pathops (>=0.5.0)", "uharfbuzz (>=0.23.0)", "brotlicffi (>=0.8.0)", "scipy", "brotli (>=1.0.1)", "munkres", "unicodedata2 (>=14.0.0)", "xattr"] -graphite = ["lz4 (>=1.7.4.2)"] -interpolatable = ["scipy", "munkres"] -lxml = ["lxml (>=4.0,<5)"] -pathops = ["skia-pathops (>=0.5.0)"] -plot = ["matplotlib"] -repacker = ["uharfbuzz (>=0.23.0)"] -symfont = ["sympy"] -type1 = ["xattr"] -ufo = ["fs (>=2.2.0,<3)"] -unicode = ["unicodedata2 (>=14.0.0)"] -woff = ["zopfli (>=0.1.4)", "brotlicffi (>=0.8.0)", "brotli (>=1.0.1)"] - -[[package]] -name = "future" -version = "0.18.2" -description = "Clean single-source support for Python 3 and 2" -category = "main" -optional = false -python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" - -[[package]] -name = "gmr" -version = "1.6.2" -description = "Gaussian Mixture Regression" -category = "main" -optional = false -python-versions = "*" - -[package.dependencies] -numpy = "*" -scipy = "*" - -[package.extras] -all = ["matplotlib", "scikit-learn", "svgpathtools"] -doc = ["pdoc3"] -test = ["nose", "coverage"] - -[[package]] -name = "iniconfig" -version = "1.1.1" -description = "iniconfig: brain-dead simple config-ini parsing" -category = "dev" -optional = false -python-versions = "*" - -[[package]] -name = "ipython" -version = "8.7.0" -description = "IPython: Productive Interactive Computing" -category = "main" -optional = false -python-versions = ">=3.8" - -[package.dependencies] -appnope = {version = "*", markers = "sys_platform == \"darwin\""} -backcall = "*" -colorama = {version = "*", markers = "sys_platform == \"win32\""} -decorator = "*" -jedi = ">=0.16" -matplotlib-inline = "*" -pexpect = {version = ">4.3", markers = "sys_platform != \"win32\""} -pickleshare = "*" -prompt-toolkit = ">=3.0.11,<3.1.0" -pygments = ">=2.4.0" -stack-data = "*" -traitlets = ">=5" - -[package.extras] -all = ["black", "ipykernel", "setuptools (>=18.5)", "sphinx (>=1.3)", "sphinx-rtd-theme", "docrepr", "matplotlib", "stack-data", "pytest (<7)", "typing-extensions", "pytest (<7.1)", "pytest-asyncio", "testpath", "nbconvert", "nbformat", "ipywidgets", "notebook", "ipyparallel", "qtconsole", "curio", "matplotlib (!=3.2.0)", "numpy (>=1.20)", "pandas", "trio"] -black = ["black"] -doc = ["ipykernel", "setuptools (>=18.5)", "sphinx (>=1.3)", "sphinx-rtd-theme", "docrepr", "matplotlib", "stack-data", "pytest (<7)", "typing-extensions", "pytest (<7.1)", "pytest-asyncio", "testpath"] -kernel = ["ipykernel"] -nbconvert = ["nbconvert"] -nbformat = ["nbformat"] -notebook = ["ipywidgets", "notebook"] -parallel = ["ipyparallel"] -qtconsole = ["qtconsole"] -test = ["pytest (<7.1)", "pytest-asyncio", "testpath"] -test_extra = ["pytest (<7.1)", "pytest-asyncio", "testpath", "curio", "matplotlib (!=3.2.0)", "nbformat", "numpy (>=1.20)", "pandas", "trio"] - -[[package]] -name = "jedi" -version = "0.18.2" -description = "An autocompletion tool for Python that can be used for text editors." -category = "main" -optional = false -python-versions = ">=3.6" - -[package.dependencies] -parso = ">=0.8.0,<0.9.0" - -[package.extras] -docs = ["Jinja2 (==2.11.3)", "MarkupSafe (==1.1.1)", "Pygments (==2.8.1)", "alabaster (==0.7.12)", "babel (==2.9.1)", "chardet (==4.0.0)", "commonmark (==0.8.1)", "docutils (==0.17.1)", "future (==0.18.2)", "idna (==2.10)", "imagesize (==1.2.0)", "mock (==1.0.1)", "packaging (==20.9)", "pyparsing (==2.4.7)", "pytz (==2021.1)", "readthedocs-sphinx-ext (==2.1.4)", "recommonmark (==0.5.0)", "requests (==2.25.1)", "six (==1.15.0)", "snowballstemmer (==2.1.0)", "sphinx-rtd-theme (==0.4.3)", "sphinx (==1.8.5)", "sphinxcontrib-serializinghtml (==1.1.4)", "sphinxcontrib-websupport (==1.2.4)", "urllib3 (==1.26.4)"] -qa = ["flake8 (==3.8.3)", "mypy (==0.782)"] -testing = ["Django (<3.1)", "attrs", "colorama", "docopt", "pytest (<7.0.0)"] - -[[package]] -name = "jinja2" -version = "3.1.2" -description = "A very fast and expressive template engine." -category = "main" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -MarkupSafe = ">=2.0" - -[package.extras] -i18n = ["Babel (>=2.7)"] - -[[package]] -name = "joblib" -version = "1.2.0" -description = "Lightweight pipelining with Python functions" -category = "main" -optional = false -python-versions = ">=3.7" - -[[package]] -name = "jsonpickle" -version = "3.0.1" -description = "Python library for serializing any arbitrary object graph into JSON" -category = "main" -optional = false -python-versions = ">=3.7" - -[package.extras] -docs = ["sphinx", "jaraco.packaging (>=3.2)", "rst.linker (>=1.9)"] -testing = ["pytest (>=3.5,!=3.7.3)", "pytest-checkdocs (>=1.2.3)", "pytest-flake8 (>=1.1.1)", "pytest-black-multipy", "pytest-cov", "ecdsa", "feedparser", "gmpy2", "numpy", "pandas", "pymongo", "scikit-learn", "sqlalchemy"] -"testing.libs" = ["simplejson", "ujson"] - -[[package]] -name = "kiwisolver" -version = "1.4.4" -description = "A fast implementation of the Cassowary constraint solver" -category = "main" -optional = false -python-versions = ">=3.7" - -[[package]] -name = "markupsafe" -version = "2.1.1" -description = "Safely add untrusted strings to HTML/XML markup." -category = "main" -optional = false -python-versions = ">=3.7" - -[[package]] -name = "matplotlib" -version = "3.6.2" -description = "Python plotting package" -category = "main" -optional = false -python-versions = ">=3.8" - -[package.dependencies] -contourpy = ">=1.0.1" -cycler = ">=0.10" -fonttools = ">=4.22.0" -kiwisolver = ">=1.0.1" -numpy = ">=1.19" -packaging = ">=20.0" -pillow = ">=6.2.0" -pyparsing = ">=2.2.1" -python-dateutil = ">=2.7" -setuptools_scm = ">=7" - -[[package]] -name = "matplotlib-inline" -version = "0.1.6" -description = "Inline Matplotlib backend for Jupyter" -category = "main" -optional = false -python-versions = ">=3.5" - -[package.dependencies] -traitlets = "*" - -[[package]] -name = "missingno" -version = "0.5.1" -description = "Missing data visualization module for Python." -category = "main" -optional = false -python-versions = "*" - -[package.dependencies] -matplotlib = "*" -numpy = "*" -scipy = "*" -seaborn = "*" - -[package.extras] -tests = ["pytest", "pytest-mpl"] - -[[package]] -name = "networkx" -version = "2.8.8" -description = "Python package for creating and manipulating graphs and networks" -category = "main" -optional = false -python-versions = ">=3.8" - -[package.extras] -default = ["numpy (>=1.19)", "scipy (>=1.8)", "matplotlib (>=3.4)", "pandas (>=1.3)"] -developer = ["pre-commit (>=2.20)", "mypy (>=0.982)"] -doc = ["sphinx (>=5.2)", "pydata-sphinx-theme (>=0.11)", "sphinx-gallery (>=0.11)", "numpydoc (>=1.5)", "pillow (>=9.2)", "nb2plots (>=0.6)", "texext (>=0.6.6)"] -extra = ["lxml (>=4.6)", "pygraphviz (>=1.9)", "pydot (>=1.4.2)", "sympy (>=1.10)"] -test = ["pytest (>=7.2)", "pytest-cov (>=4.0)", "codecov (>=2.1)"] - -[[package]] -name = "numpy" -version = "1.24.0" -description = "Fundamental package for array computing in Python" -category = "main" -optional = false -python-versions = ">=3.8" - -[[package]] -name = "nvidia-cublas-cu11" -version = "11.10.3.66" -description = "CUBLAS native runtime libraries" -category = "main" -optional = false -python-versions = ">=3" - -[[package]] -name = "nvidia-cuda-nvrtc-cu11" -version = "11.7.99" -description = "NVRTC native runtime libraries" -category = "main" -optional = false -python-versions = ">=3" - -[[package]] -name = "nvidia-cuda-runtime-cu11" -version = "11.7.99" -description = "CUDA Runtime native Libraries" -category = "main" -optional = false -python-versions = ">=3" - -[[package]] -name = "nvidia-cudnn-cu11" -version = "8.5.0.96" -description = "cuDNN runtime libraries" -category = "main" -optional = false -python-versions = ">=3" - -[[package]] -name = "opt-einsum" -version = "3.3.0" -description = "Optimizing numpys einsum function" -category = "main" -optional = false -python-versions = ">=3.5" - -[package.dependencies] -numpy = ">=1.7" - -[package.extras] -docs = ["sphinx (==1.2.3)", "sphinxcontrib-napoleon", "sphinx-rtd-theme", "numpydoc"] -tests = ["pytest", "pytest-cov", "pytest-pep8"] - -[[package]] -name = "packaging" -version = "22.0" -description = "Core utilities for Python packages" -category = "main" -optional = false -python-versions = ">=3.7" - -[[package]] -name = "pandas" -version = "1.5.2" -description = "Powerful data structures for data analysis, time series, and statistics" -category = "main" -optional = false -python-versions = ">=3.8" - -[package.dependencies] -numpy = [ - {version = ">=1.20.3", markers = "python_version < \"3.10\""}, - {version = ">=1.21.0", markers = "python_version >= \"3.10\""}, -] -python-dateutil = ">=2.8.1" -pytz = ">=2020.1" - -[package.extras] -test = ["hypothesis (>=5.5.3)", "pytest (>=6.0)", "pytest-xdist (>=1.31)"] - -[[package]] -name = "parso" -version = "0.8.3" -description = "A Python Parser" -category = "main" -optional = false -python-versions = ">=3.6" - -[package.extras] -qa = ["flake8 (==3.8.3)", "mypy (==0.782)"] -testing = ["docopt", "pytest (<6.0.0)"] - -[[package]] -name = "patsy" -version = "0.5.3" -description = "A Python package for describing statistical models and for building design matrices." -category = "main" -optional = false -python-versions = "*" - -[package.dependencies] -numpy = ">=1.4" -six = "*" - -[package.extras] -test = ["pytest", "pytest-cov", "scipy"] - -[[package]] -name = "pexpect" -version = "4.8.0" -description = "Pexpect allows easy control of interactive console applications." -category = "main" -optional = false -python-versions = "*" - -[package.dependencies] -ptyprocess = ">=0.5" - -[[package]] -name = "pgmpy" -version = "0.1.20" -description = "A library for Probabilistic Graphical Models" -category = "main" -optional = false -python-versions = "*" - -[package.dependencies] -joblib = "*" -networkx = "*" -numpy = "*" -opt-einsum = "*" -pandas = "*" -pyparsing = "*" -scikit-learn = "*" -scipy = "*" -statsmodels = "*" -torch = "*" -tqdm = "*" - -[package.extras] -all = ["networkx", "numpy", "scipy", "scikit-learn", "pandas", "pyparsing", "torch", "statsmodels", "tqdm", "joblib", "opt-einsum", "xdoctest", "pytest", "pytest-cov", "coverage", "codecov", "mock", "black", "daft"] -tests = ["xdoctest", "pytest", "pytest-cov", "coverage", "codecov", "mock", "black"] - -[[package]] -name = "pickleshare" -version = "0.7.5" -description = "Tiny 'shelve'-like database with concurrency support" -category = "main" -optional = false -python-versions = "*" - -[[package]] -name = "pillow" -version = "9.3.0" -description = "Python Imaging Library (Fork)" -category = "main" -optional = false -python-versions = ">=3.7" - -[package.extras] -docs = ["furo", "olefile", "sphinx (>=2.4)", "sphinx-copybutton", "sphinx-issues (>=3.0.1)", "sphinx-removed-in", "sphinxext-opengraph"] -tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout"] - -[[package]] -name = "pluggy" -version = "1.0.0" -description = "plugin and hook calling mechanisms for python" -category = "dev" -optional = false -python-versions = ">=3.6" - -[package.extras] -dev = ["pre-commit", "tox"] -testing = ["pytest", "pytest-benchmark"] - -[[package]] -name = "pomegranate" -version = "0.14.8" -description = "Pomegranate is a graphical models library for Python, implemented in Cython for speed." -category = "main" -optional = false -python-versions = "*" - -[package.dependencies] -joblib = ">=0.9.0b4" -networkx = ">=2.4" -numpy = ">=1.20.0" -pyyaml = "*" -scipy = ">=0.17.0" - -[package.extras] -gpu = ["cupy"] -plotting = ["pygraphviz", "matplotlib"] - -[[package]] -name = "prompt-toolkit" -version = "3.0.36" -description = "Library for building powerful interactive command lines in Python" -category = "main" -optional = false -python-versions = ">=3.6.2" - -[package.dependencies] -wcwidth = "*" - -[[package]] -name = "ptyprocess" -version = "0.7.0" -description = "Run a subprocess in a pseudo terminal" -category = "main" -optional = false -python-versions = "*" - -[[package]] -name = "pure-eval" -version = "0.2.2" -description = "Safely evaluate AST nodes without side effects" -category = "main" -optional = false -python-versions = "*" - -[package.extras] -tests = ["pytest"] - -[[package]] -name = "py" -version = "1.11.0" -description = "library with cross-python path, ini-parsing, io, code, log facilities" -category = "dev" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" - -[[package]] -name = "pygments" -version = "2.13.0" -description = "Pygments is a syntax highlighting package written in Python." -category = "main" -optional = false -python-versions = ">=3.6" - -[package.extras] -plugins = ["importlib-metadata"] - -[[package]] -name = "pyitlib" -version = "0.2.2" -description = "A library of information-theoretic methods" -category = "main" -optional = false -python-versions = "*" - -[package.dependencies] -future = ">=0.16.0" -pandas = ">=0.20.2numpy" -scikit-learn = ">=0.16.0" -scipy = ">=1.0.1" - -[[package]] -name = "pyparsing" -version = "3.0.9" -description = "pyparsing module - Classes and methods to define and execute parsing grammars" -category = "main" -optional = false -python-versions = ">=3.6.8" - -[package.extras] -diagrams = ["railroad-diagrams", "jinja2"] - -[[package]] -name = "pytest" -version = "7.1.3" -description = "pytest: simple powerful testing with Python" -category = "dev" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -attrs = ">=19.2.0" -colorama = {version = "*", markers = "sys_platform == \"win32\""} -iniconfig = "*" -packaging = "*" -pluggy = ">=0.12,<2.0" -py = ">=1.8.2" -tomli = ">=1.0.0" - -[package.extras] -testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "xmlschema"] - -[[package]] -name = "python-dateutil" -version = "2.8.2" -description = "Extensions to the standard Python datetime module" -category = "main" -optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" - -[package.dependencies] -six = ">=1.5" - -[[package]] -name = "pytz" -version = "2022.7" -description = "World timezone definitions, modern and historical" -category = "main" -optional = false -python-versions = "*" - -[[package]] -name = "pyvis" -version = "0.3.1" -description = "A Python network graph visualization library" -category = "main" -optional = false -python-versions = ">3.6" - -[package.dependencies] -ipython = ">=5.3.0" -jinja2 = ">=2.9.6" -jsonpickle = ">=1.4.1" -networkx = ">=1.11" - -[[package]] -name = "pyyaml" -version = "6.0" -description = "YAML parser and emitter for Python" -category = "main" -optional = false -python-versions = ">=3.6" - -[[package]] -name = "scikit-learn" -version = "1.2.0" -description = "A set of python modules for machine learning and data mining" -category = "main" -optional = false -python-versions = ">=3.8" - -[package.dependencies] -joblib = ">=1.1.1" -numpy = ">=1.17.3" -scipy = ">=1.3.2" -threadpoolctl = ">=2.0.0" - -[package.extras] -benchmark = ["matplotlib (>=3.1.3)", "pandas (>=1.0.5)", "memory-profiler (>=0.57.0)"] -docs = ["matplotlib (>=3.1.3)", "scikit-image (>=0.16.2)", "pandas (>=1.0.5)", "seaborn (>=0.9.0)", "memory-profiler (>=0.57.0)", "sphinx (>=4.0.1)", "sphinx-gallery (>=0.7.0)", "numpydoc (>=1.2.0)", "Pillow (>=7.1.2)", "pooch (>=1.6.0)", "sphinx-prompt (>=1.3.0)", "sphinxext-opengraph (>=0.4.2)", "plotly (>=5.10.0)"] -examples = ["matplotlib (>=3.1.3)", "scikit-image (>=0.16.2)", "pandas (>=1.0.5)", "seaborn (>=0.9.0)", "pooch (>=1.6.0)", "plotly (>=5.10.0)"] -tests = ["matplotlib (>=3.1.3)", "scikit-image (>=0.16.2)", "pandas (>=1.0.5)", "pytest (>=5.3.1)", "pytest-cov (>=2.9.0)", "flake8 (>=3.8.2)", "black (>=22.3.0)", "mypy (>=0.961)", "pyamg (>=4.0.0)", "numpydoc (>=1.2.0)", "pooch (>=1.6.0)"] - -[[package]] -name = "scipy" -version = "1.9.3" -description = "Fundamental algorithms for scientific computing in Python" -category = "main" -optional = false -python-versions = ">=3.8" - -[package.dependencies] -numpy = ">=1.18.5,<1.26.0" - -[package.extras] -test = ["pytest", "pytest-cov", "pytest-xdist", "asv", "mpmath", "gmpy2", "threadpoolctl", "scikit-umfpack"] -doc = ["sphinx (!=4.1.0)", "pydata-sphinx-theme (==0.9.0)", "sphinx-panels (>=0.5.2)", "matplotlib (>2)", "numpydoc", "sphinx-tabs"] -dev = ["mypy", "typing-extensions", "pycodestyle", "flake8"] - -[[package]] -name = "seaborn" -version = "0.12.1" -description = "Statistical data visualization" -category = "main" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -matplotlib = ">=3.1,<3.6.1 || >3.6.1" -numpy = ">=1.17" -pandas = ">=0.25" - -[package.extras] -dev = ["pytest", "pytest-cov", "pytest-xdist", "flake8", "mypy", "pandas-stubs", "pre-commit"] -docs = ["numpydoc", "nbconvert", "ipykernel", "sphinx-copybutton", "sphinx-issues", "sphinx-design", "pyyaml", "pydata_sphinx_theme (==0.10.0rc2)"] -stats = ["scipy (>=1.3)", "statsmodels (>=0.10)"] - -[[package]] -name = "setuptools-scm" -version = "7.1.0" -description = "the blessed package to manage your versions by scm tags" -category = "main" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -packaging = ">=20.0" -tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""} -typing-extensions = "*" - -[package.extras] -test = ["pytest (>=6.2)", "virtualenv (>20)"] -toml = ["setuptools (>=42)"] - -[[package]] -name = "six" -version = "1.16.0" -description = "Python 2 and 3 compatibility utilities" -category = "main" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" - -[[package]] -name = "stack-data" -version = "0.6.2" -description = "Extract data from python stack frames and tracebacks for informative displays" -category = "main" -optional = false -python-versions = "*" - -[package.dependencies] -asttokens = ">=2.1.0" -executing = ">=1.2.0" -pure-eval = "*" - -[package.extras] -tests = ["pytest", "typeguard", "pygments", "littleutils", "cython"] - -[[package]] -name = "statsmodels" -version = "0.13.2" -description = "Statistical computations and models for Python" -category = "main" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -numpy = ">=1.17" -packaging = ">=21.3" -pandas = ">=0.25" -patsy = ">=0.5.2" -scipy = ">=1.3" - -[package.extras] -build = ["cython (>=0.29.26)"] -develop = ["cython (>=0.29.26)"] -docs = ["sphinx", "nbconvert", "jupyter-client", "ipykernel", "matplotlib", "nbformat", "numpydoc", "pandas-datareader"] - -[[package]] -name = "statsmodels" -version = "0.13.3" -description = "Statistical computations and models for Python" -category = "main" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -numpy = [ - {version = ">=1.17", markers = "python_version != \"3.10\" or platform_system != \"Windows\" or platform_python_implementation == \"PyPy\""}, - {version = ">=1.22.3", markers = "python_version == \"3.10\" and platform_system == \"Windows\" and platform_python_implementation != \"PyPy\""}, -] -packaging = ">=21.3" -pandas = ">=0.25" -patsy = ">=0.5.2" -scipy = {version = ">=1.3", markers = "(python_version > \"3.7\" or platform_system != \"Windows\" or platform_machine != \"x86\") and python_version < \"3.12\""} - -[package.extras] -build = ["cython (>=0.29.32)"] -develop = ["cython (>=0.29.32)", "cython (>=0.29.32,<3.0.0)", "setuptools_scm[toml] (>=7.0.0,<7.1.0)", "oldest-supported-numpy (>=2022.4.18)", "matplotlib (>=3)", "colorama", "joblib", "jinja2", "pytest (>=7.0.1,<7.1.0)", "pytest-randomly", "pytest-xdist", "flake8", "isort", "pywinpty"] -docs = ["sphinx", "nbconvert", "jupyter-client", "ipykernel", "matplotlib", "nbformat", "numpydoc", "pandas-datareader"] - -[[package]] -name = "statsmodels" -version = "0.13.4" -description = "Statistical computations and models for Python" -category = "main" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -numpy = [ - {version = ">=1.17", markers = "python_version != \"3.10\" or platform_system != \"Windows\" or platform_python_implementation == \"PyPy\""}, - {version = ">=1.22.3", markers = "python_version == \"3.10\" and platform_system == \"Windows\" and platform_python_implementation != \"PyPy\""}, -] -packaging = ">=21.3" -pandas = ">=0.25" -patsy = ">=0.5.2" -scipy = {version = ">=1.3", markers = "(python_version > \"3.9\" or platform_system != \"Windows\" or platform_machine != \"x86\") and python_version < \"3.12\""} - -[package.extras] -build = ["cython (>=0.29.32)"] -develop = ["cython (>=0.29.32)", "cython (>=0.29.32,<3.0.0)", "setuptools_scm[toml] (>=7.0.0,<7.1.0)", "oldest-supported-numpy (>=2022.4.18)", "matplotlib (>=3)", "colorama", "joblib", "jinja2", "pytest (>=7.0.1,<7.1.0)", "pytest-randomly", "pytest-xdist", "flake8", "isort", "pywinpty"] -docs = ["sphinx", "nbconvert", "jupyter-client", "ipykernel", "matplotlib", "nbformat", "numpydoc", "pandas-datareader"] - -[[package]] -name = "statsmodels" -version = "0.13.5" -description = "Statistical computations and models for Python" -category = "main" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -numpy = [ - {version = ">=1.17", markers = "python_version != \"3.10\" or platform_system != \"Windows\" or platform_python_implementation == \"PyPy\""}, - {version = ">=1.22.3", markers = "python_version == \"3.10\" and platform_system == \"Windows\" and platform_python_implementation != \"PyPy\""}, -] -packaging = ">=21.3" -pandas = ">=0.25" -patsy = ">=0.5.2" -scipy = {version = ">=1.3", markers = "(python_version > \"3.9\" or platform_system != \"Windows\" or platform_machine != \"x86\") and python_version < \"3.12\""} - -[package.extras] -build = ["cython (>=0.29.32)"] -develop = ["cython (>=0.29.32)", "cython (>=0.29.32,<3.0.0)", "setuptools-scm[toml] (>=7.0.0,<7.1.0)", "oldest-supported-numpy (>=2022.4.18)", "matplotlib (>=3)", "colorama", "joblib", "jinja2", "pytest (>=7.0.1,<7.1.0)", "pytest-randomly", "pytest-xdist", "flake8", "isort", "pywinpty"] -docs = ["sphinx", "nbconvert", "jupyter-client", "ipykernel", "matplotlib", "nbformat", "numpydoc", "pandas-datareader"] - -[[package]] -name = "threadpoolctl" -version = "3.1.0" -description = "threadpoolctl" -category = "main" -optional = false -python-versions = ">=3.6" - -[[package]] -name = "tomli" -version = "2.0.1" -description = "A lil' TOML parser" -category = "main" -optional = false -python-versions = ">=3.7" - -[[package]] -name = "torch" -version = "1.13.1" -description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration" -category = "main" -optional = false -python-versions = ">=3.7.0" - -[package.dependencies] -nvidia-cublas-cu11 = {version = "11.10.3.66", markers = "platform_system == \"Linux\""} -nvidia-cuda-nvrtc-cu11 = {version = "11.7.99", markers = "platform_system == \"Linux\""} -nvidia-cuda-runtime-cu11 = {version = "11.7.99", markers = "platform_system == \"Linux\""} -nvidia-cudnn-cu11 = {version = "8.5.0.96", markers = "platform_system == \"Linux\""} -typing-extensions = "*" - -[package.extras] -opt-einsum = ["opt-einsum (>=3.3)"] - -[[package]] -name = "tqdm" -version = "4.64.1" -description = "Fast, Extensible Progress Meter" -category = "main" -optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.7" - -[package.dependencies] -colorama = {version = "*", markers = "platform_system == \"Windows\""} - -[package.extras] -dev = ["py-make (>=0.1.0)", "twine", "wheel"] -notebook = ["ipywidgets (>=6)"] -slack = ["slack-sdk"] -telegram = ["requests"] - -[[package]] -name = "traitlets" -version = "5.8.0" -description = "Traitlets Python configuration system" -category = "main" -optional = false -python-versions = ">=3.7" - -[package.extras] -docs = ["myst-parser", "pydata-sphinx-theme", "sphinx"] -test = ["argcomplete (>=2.0)", "pre-commit", "pytest", "pytest-mock"] - -[[package]] -name = "typing-extensions" -version = "4.4.0" -description = "Backported and Experimental Type Hints for Python 3.7+" -category = "main" -optional = false -python-versions = ">=3.7" - -[[package]] -name = "wcwidth" -version = "0.2.5" -description = "Measures the displayed width of unicode strings in a terminal" -category = "main" -optional = false -python-versions = "*" - -[metadata] -lock-version = "1.1" -python-versions = ">=3.9,<3.11" -content-hash = "de813fd55bcec86ac2ea53f5f60b9f804a293af5c56de030b703aef0bf36614f" - -[metadata.files] -appnope = [] -asttokens = [] -attrs = [] -backcall = [ - {file = "backcall-0.2.0-py2.py3-none-any.whl", hash = "sha256:fbbce6a29f263178a1f7915c1940bde0ec2b2a967566fe1c65c1dfb7422bd255"}, - {file = "backcall-0.2.0.tar.gz", hash = "sha256:5cbdbf27be5e7cfadb448baf0aa95508f91f2bbc6c6437cd9cd06e2a4c215e1e"}, -] -colorama = [] -contourpy = [] -cycler = [ - {file = "cycler-0.11.0-py3-none-any.whl", hash = "sha256:3a27e95f763a428a739d2add979fa7494c912a32c17c4c38c4d5f082cad165a3"}, - {file = "cycler-0.11.0.tar.gz", hash = "sha256:9c87405839a19696e837b3b818fed3f5f69f16f1eec1a1ad77e043dcea9c772f"}, -] -decorator = [ - {file = "decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186"}, - {file = "decorator-5.1.1.tar.gz", hash = "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330"}, -] -executing = [] -fonttools = [] -future = [] -gmr = [ - {file = "gmr-1.6.2.tar.gz", hash = "sha256:953e3f350ac94557612a1832cba0c319389d4f857fe0cf8cd51a1706c3935e6d"}, -] -iniconfig = [] -ipython = [] -jedi = [] -jinja2 = [] -joblib = [] -jsonpickle = [] -kiwisolver = [] -markupsafe = [ - {file = "MarkupSafe-2.1.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:86b1f75c4e7c2ac2ccdaec2b9022845dbb81880ca318bb7a0a01fbf7813e3812"}, - {file = "MarkupSafe-2.1.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f121a1420d4e173a5d96e47e9a0c0dcff965afdf1626d28de1460815f7c4ee7a"}, - {file = "MarkupSafe-2.1.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a49907dd8420c5685cfa064a1335b6754b74541bbb3706c259c02ed65b644b3e"}, - {file = "MarkupSafe-2.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:10c1bfff05d95783da83491be968e8fe789263689c02724e0c691933c52994f5"}, - {file = "MarkupSafe-2.1.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b7bd98b796e2b6553da7225aeb61f447f80a1ca64f41d83612e6139ca5213aa4"}, - {file = "MarkupSafe-2.1.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:b09bf97215625a311f669476f44b8b318b075847b49316d3e28c08e41a7a573f"}, - {file = "MarkupSafe-2.1.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:694deca8d702d5db21ec83983ce0bb4b26a578e71fbdbd4fdcd387daa90e4d5e"}, - {file = "MarkupSafe-2.1.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:efc1913fd2ca4f334418481c7e595c00aad186563bbc1ec76067848c7ca0a933"}, - {file = "MarkupSafe-2.1.1-cp310-cp310-win32.whl", hash = "sha256:4a33dea2b688b3190ee12bd7cfa29d39c9ed176bda40bfa11099a3ce5d3a7ac6"}, - {file = "MarkupSafe-2.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:dda30ba7e87fbbb7eab1ec9f58678558fd9a6b8b853530e176eabd064da81417"}, - {file = "MarkupSafe-2.1.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:671cd1187ed5e62818414afe79ed29da836dde67166a9fac6d435873c44fdd02"}, - {file = "MarkupSafe-2.1.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3799351e2336dc91ea70b034983ee71cf2f9533cdff7c14c90ea126bfd95d65a"}, - {file = "MarkupSafe-2.1.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e72591e9ecd94d7feb70c1cbd7be7b3ebea3f548870aa91e2732960fa4d57a37"}, - {file = "MarkupSafe-2.1.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6fbf47b5d3728c6aea2abb0589b5d30459e369baa772e0f37a0320185e87c980"}, - {file = "MarkupSafe-2.1.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:d5ee4f386140395a2c818d149221149c54849dfcfcb9f1debfe07a8b8bd63f9a"}, - {file = "MarkupSafe-2.1.1-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:bcb3ed405ed3222f9904899563d6fc492ff75cce56cba05e32eff40e6acbeaa3"}, - {file = "MarkupSafe-2.1.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:e1c0b87e09fa55a220f058d1d49d3fb8df88fbfab58558f1198e08c1e1de842a"}, - {file = "MarkupSafe-2.1.1-cp37-cp37m-win32.whl", hash = "sha256:8dc1c72a69aa7e082593c4a203dcf94ddb74bb5c8a731e4e1eb68d031e8498ff"}, - {file = "MarkupSafe-2.1.1-cp37-cp37m-win_amd64.whl", hash = "sha256:97a68e6ada378df82bc9f16b800ab77cbf4b2fada0081794318520138c088e4a"}, - {file = "MarkupSafe-2.1.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:e8c843bbcda3a2f1e3c2ab25913c80a3c5376cd00c6e8c4a86a89a28c8dc5452"}, - {file = "MarkupSafe-2.1.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0212a68688482dc52b2d45013df70d169f542b7394fc744c02a57374a4207003"}, - {file = "MarkupSafe-2.1.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e576a51ad59e4bfaac456023a78f6b5e6e7651dcd383bcc3e18d06f9b55d6d1"}, - {file = "MarkupSafe-2.1.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b9fe39a2ccc108a4accc2676e77da025ce383c108593d65cc909add5c3bd601"}, - {file = "MarkupSafe-2.1.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:96e37a3dc86e80bf81758c152fe66dbf60ed5eca3d26305edf01892257049925"}, - {file = "MarkupSafe-2.1.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:6d0072fea50feec76a4c418096652f2c3238eaa014b2f94aeb1d56a66b41403f"}, - {file = "MarkupSafe-2.1.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:089cf3dbf0cd6c100f02945abeb18484bd1ee57a079aefd52cffd17fba910b88"}, - {file = "MarkupSafe-2.1.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:6a074d34ee7a5ce3effbc526b7083ec9731bb3cbf921bbe1d3005d4d2bdb3a63"}, - {file = "MarkupSafe-2.1.1-cp38-cp38-win32.whl", hash = "sha256:421be9fbf0ffe9ffd7a378aafebbf6f4602d564d34be190fc19a193232fd12b1"}, - {file = "MarkupSafe-2.1.1-cp38-cp38-win_amd64.whl", hash = "sha256:fc7b548b17d238737688817ab67deebb30e8073c95749d55538ed473130ec0c7"}, - {file = "MarkupSafe-2.1.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:e04e26803c9c3851c931eac40c695602c6295b8d432cbe78609649ad9bd2da8a"}, - {file = "MarkupSafe-2.1.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b87db4360013327109564f0e591bd2a3b318547bcef31b468a92ee504d07ae4f"}, - {file = "MarkupSafe-2.1.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:99a2a507ed3ac881b975a2976d59f38c19386d128e7a9a18b7df6fff1fd4c1d6"}, - {file = "MarkupSafe-2.1.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:56442863ed2b06d19c37f94d999035e15ee982988920e12a5b4ba29b62ad1f77"}, - {file = "MarkupSafe-2.1.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3ce11ee3f23f79dbd06fb3d63e2f6af7b12db1d46932fe7bd8afa259a5996603"}, - {file = "MarkupSafe-2.1.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:33b74d289bd2f5e527beadcaa3f401e0df0a89927c1559c8566c066fa4248ab7"}, - {file = "MarkupSafe-2.1.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:43093fb83d8343aac0b1baa75516da6092f58f41200907ef92448ecab8825135"}, - {file = "MarkupSafe-2.1.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8e3dcf21f367459434c18e71b2a9532d96547aef8a871872a5bd69a715c15f96"}, - {file = "MarkupSafe-2.1.1-cp39-cp39-win32.whl", hash = "sha256:d4306c36ca495956b6d568d276ac11fdd9c30a36f1b6eb928070dc5360b22e1c"}, - {file = "MarkupSafe-2.1.1-cp39-cp39-win_amd64.whl", hash = "sha256:46d00d6cfecdde84d40e572d63735ef81423ad31184100411e6e3388d405e247"}, - {file = "MarkupSafe-2.1.1.tar.gz", hash = "sha256:7f91197cc9e48f989d12e4e6fbc46495c446636dfc81b9ccf50bb0ec74b91d4b"}, -] -matplotlib = [] -matplotlib-inline = [] -missingno = [ - {file = "missingno-0.5.1-py3-none-any.whl", hash = "sha256:74e8fa1ea68c9482479a9429009856ae6cc64725f085092429950e30a8d78f55"}, - {file = "missingno-0.5.1.tar.gz", hash = "sha256:22e1735a9213df7425e76123ebcc627d11a2608a1d725b90c6a2d7329db718db"}, -] -networkx = [] -numpy = [] -nvidia-cublas-cu11 = [] -nvidia-cuda-nvrtc-cu11 = [] -nvidia-cuda-runtime-cu11 = [] -nvidia-cudnn-cu11 = [] -opt-einsum = [] -packaging = [] -pandas = [] -parso = [ - {file = "parso-0.8.3-py2.py3-none-any.whl", hash = "sha256:c001d4636cd3aecdaf33cbb40aebb59b094be2a74c556778ef5576c175e19e75"}, - {file = "parso-0.8.3.tar.gz", hash = "sha256:8c07be290bb59f03588915921e29e8a50002acaf2cdc5fa0e0114f91709fafa0"}, -] -patsy = [] -pexpect = [ - {file = "pexpect-4.8.0-py2.py3-none-any.whl", hash = "sha256:0b48a55dcb3c05f3329815901ea4fc1537514d6ba867a152b581d69ae3710937"}, - {file = "pexpect-4.8.0.tar.gz", hash = "sha256:fc65a43959d153d0114afe13997d439c22823a27cefceb5ff35c2178c6784c0c"}, -] -pgmpy = [] -pickleshare = [ - {file = "pickleshare-0.7.5-py2.py3-none-any.whl", hash = "sha256:9649af414d74d4df115d5d718f82acb59c9d418196b7b4290ed47a12ce62df56"}, - {file = "pickleshare-0.7.5.tar.gz", hash = "sha256:87683d47965c1da65cdacaf31c8441d12b8044cdec9aca500cd78fc2c683afca"}, -] -pillow = [] -pluggy = [] -pomegranate = [ - {file = "pomegranate-0.14.8-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:d5f1d83eb44d924640fbe952c38e904d8aa97882ffc85fb16e120a40e0463e3c"}, - {file = "pomegranate-0.14.8-cp37-cp37m-win32.whl", hash = "sha256:741bc1a5c2fd483b4713a58feac81ac6a5bf4354b326bfe5e320d193a08fbfc2"}, - {file = "pomegranate-0.14.8-cp37-cp37m-win_amd64.whl", hash = "sha256:967e06272526accf54418cd3dbce96cdf62083690142a5f124cba05256663939"}, - {file = "pomegranate-0.14.8-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6808b8bc990e6755d8160ee660472b463ece4f2662dea17dd2184a426903027d"}, - {file = "pomegranate-0.14.8-cp38-cp38-win32.whl", hash = "sha256:61911a91dfb3e7158329707890c844da71919c0dc9b5974cc5941781021c0ee3"}, - {file = "pomegranate-0.14.8-cp38-cp38-win_amd64.whl", hash = "sha256:22038290a34d98c19bf13f1a89df3fe5f1799aa4ba5ac5edd6efc1fbf68ae564"}, - {file = "pomegranate-0.14.8-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a1932f8e91bfb9bc3ae710e3e73c88daed161d25e969a5531e67918db4bb6b99"}, - {file = "pomegranate-0.14.8-cp39-cp39-win32.whl", hash = "sha256:f0f5d9ba93e4ac542a2a260957ff6e1d4e52e714c3993a7af5cf3a0e42e14253"}, - {file = "pomegranate-0.14.8-cp39-cp39-win_amd64.whl", hash = "sha256:8774b66c7882d15a64b47d91cdb333a6c6711f4d96dfa83bf285a9bcf5c0cd97"}, - {file = "pomegranate-0.14.8.tar.gz", hash = "sha256:2296651290482dd53204ffaaaea267ceee057ce1b3ef1f9d9793febe66d6693d"}, -] -prompt-toolkit = [] -ptyprocess = [ - {file = "ptyprocess-0.7.0-py2.py3-none-any.whl", hash = "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35"}, - {file = "ptyprocess-0.7.0.tar.gz", hash = "sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220"}, -] -pure-eval = [ - {file = "pure_eval-0.2.2-py3-none-any.whl", hash = "sha256:01eaab343580944bc56080ebe0a674b39ec44a945e6d09ba7db3cb8cec289350"}, - {file = "pure_eval-0.2.2.tar.gz", hash = "sha256:2b45320af6dfaa1750f543d714b6d1c520a1688dec6fd24d339063ce0aaa9ac3"}, -] -py = [] -pygments = [] -pyitlib = [] -pyparsing = [] -pytest = [] -python-dateutil = [ - {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, - {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, -] -pytz = [] -pyvis = [] -pyyaml = [ - {file = "PyYAML-6.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d4db7c7aef085872ef65a8fd7d6d09a14ae91f691dec3e87ee5ee0539d516f53"}, - {file = "PyYAML-6.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9df7ed3b3d2e0ecfe09e14741b857df43adb5a3ddadc919a2d94fbdf78fea53c"}, - {file = "PyYAML-6.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77f396e6ef4c73fdc33a9157446466f1cff553d979bd00ecb64385760c6babdc"}, - {file = "PyYAML-6.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a80a78046a72361de73f8f395f1f1e49f956c6be882eed58505a15f3e430962b"}, - {file = "PyYAML-6.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f84fbc98b019fef2ee9a1cb3ce93e3187a6df0b2538a651bfb890254ba9f90b5"}, - {file = "PyYAML-6.0-cp310-cp310-win32.whl", hash = "sha256:2cd5df3de48857ed0544b34e2d40e9fac445930039f3cfe4bcc592a1f836d513"}, - {file = "PyYAML-6.0-cp310-cp310-win_amd64.whl", hash = "sha256:daf496c58a8c52083df09b80c860005194014c3698698d1a57cbcfa182142a3a"}, - {file = "PyYAML-6.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:897b80890765f037df3403d22bab41627ca8811ae55e9a722fd0392850ec4d86"}, - {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:50602afada6d6cbfad699b0c7bb50d5ccffa7e46a3d738092afddc1f9758427f"}, - {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:48c346915c114f5fdb3ead70312bd042a953a8ce5c7106d5bfb1a5254e47da92"}, - {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:98c4d36e99714e55cfbaaee6dd5badbc9a1ec339ebfc3b1f52e293aee6bb71a4"}, - {file = "PyYAML-6.0-cp36-cp36m-win32.whl", hash = "sha256:0283c35a6a9fbf047493e3a0ce8d79ef5030852c51e9d911a27badfde0605293"}, - {file = "PyYAML-6.0-cp36-cp36m-win_amd64.whl", hash = "sha256:07751360502caac1c067a8132d150cf3d61339af5691fe9e87803040dbc5db57"}, - {file = "PyYAML-6.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:819b3830a1543db06c4d4b865e70ded25be52a2e0631ccd2f6a47a2822f2fd7c"}, - {file = "PyYAML-6.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:473f9edb243cb1935ab5a084eb238d842fb8f404ed2193a915d1784b5a6b5fc0"}, - {file = "PyYAML-6.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0ce82d761c532fe4ec3f87fc45688bdd3a4c1dc5e0b4a19814b9009a29baefd4"}, - {file = "PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:231710d57adfd809ef5d34183b8ed1eeae3f76459c18fb4a0b373ad56bedcdd9"}, - {file = "PyYAML-6.0-cp37-cp37m-win32.whl", hash = "sha256:c5687b8d43cf58545ade1fe3e055f70eac7a5a1a0bf42824308d868289a95737"}, - {file = "PyYAML-6.0-cp37-cp37m-win_amd64.whl", hash = "sha256:d15a181d1ecd0d4270dc32edb46f7cb7733c7c508857278d3d378d14d606db2d"}, - {file = "PyYAML-6.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0b4624f379dab24d3725ffde76559cff63d9ec94e1736b556dacdfebe5ab6d4b"}, - {file = "PyYAML-6.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:213c60cd50106436cc818accf5baa1aba61c0189ff610f64f4a3e8c6726218ba"}, - {file = "PyYAML-6.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9fa600030013c4de8165339db93d182b9431076eb98eb40ee068700c9c813e34"}, - {file = "PyYAML-6.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:277a0ef2981ca40581a47093e9e2d13b3f1fbbeffae064c1d21bfceba2030287"}, - {file = "PyYAML-6.0-cp38-cp38-win32.whl", hash = "sha256:d4eccecf9adf6fbcc6861a38015c2a64f38b9d94838ac1810a9023a0609e1b78"}, - {file = "PyYAML-6.0-cp38-cp38-win_amd64.whl", hash = "sha256:1e4747bc279b4f613a09eb64bba2ba602d8a6664c6ce6396a4d0cd413a50ce07"}, - {file = "PyYAML-6.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:055d937d65826939cb044fc8c9b08889e8c743fdc6a32b33e2390f66013e449b"}, - {file = "PyYAML-6.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e61ceaab6f49fb8bdfaa0f92c4b57bcfbea54c09277b1b4f7ac376bfb7a7c174"}, - {file = "PyYAML-6.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d67d839ede4ed1b28a4e8909735fc992a923cdb84e618544973d7dfc71540803"}, - {file = "PyYAML-6.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cba8c411ef271aa037d7357a2bc8f9ee8b58b9965831d9e51baf703280dc73d3"}, - {file = "PyYAML-6.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:40527857252b61eacd1d9af500c3337ba8deb8fc298940291486c465c8b46ec0"}, - {file = "PyYAML-6.0-cp39-cp39-win32.whl", hash = "sha256:b5b9eccad747aabaaffbc6064800670f0c297e52c12754eb1d976c57e4f74dcb"}, - {file = "PyYAML-6.0-cp39-cp39-win_amd64.whl", hash = "sha256:b3d267842bf12586ba6c734f89d1f5b871df0273157918b0ccefa29deb05c21c"}, - {file = "PyYAML-6.0.tar.gz", hash = "sha256:68fb519c14306fec9720a2a5b45bc9f0c8d1b9c72adf45c37baedfcd949c35a2"}, -] -scikit-learn = [] -scipy = [] -seaborn = [] -setuptools-scm = [] -six = [ - {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, - {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, -] -stack-data = [] -statsmodels = [ - {file = "statsmodels-0.13.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:3e7ca5b7e678c0bb7a24f5c735d58ac104a50eb61b17c484cce0e221a095560f"}, - {file = "statsmodels-0.13.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:066a75d5585378b2df972f81a90b9a3da5e567b7d4833300c1597438c1a35e29"}, - {file = "statsmodels-0.13.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f15f38dfc9c5c091662cb619e12322047368c67aef449c7554d9b324a15f7a94"}, - {file = "statsmodels-0.13.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5c4ccc6b4744613367e8a233bd952c8a838db8f528f9fe033bda25aa13fc7d08"}, - {file = "statsmodels-0.13.2-cp310-cp310-win_amd64.whl", hash = "sha256:855b1cc2a91ab140b9bcf304b1731705805ce73223bf500b988804968554c0ed"}, - {file = "statsmodels-0.13.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b69c9af7606325095f7c40c581957bad9f28775653d41537c1ec4cd1b185ff5b"}, - {file = "statsmodels-0.13.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ab31bac0f72b83bca1f217a12ec6f309a56485a50c4a705fbdd63112213d4da4"}, - {file = "statsmodels-0.13.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5d680b910b57fc0aa87472662cdfe09aae0e21db4bdf19ccd6420fd4dffda892"}, - {file = "statsmodels-0.13.2-cp37-cp37m-win32.whl", hash = "sha256:9e9a3f661d372431850d55157d049e079493c97fc06f550d23d8c8c70805cc48"}, - {file = "statsmodels-0.13.2-cp37-cp37m-win_amd64.whl", hash = "sha256:c9f6326870c095ef688f072cd476b932aff0906d60193eaa08e93ec23b29ca83"}, - {file = "statsmodels-0.13.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:5bc050f25f1ba1221efef9ea01b751c60935ad787fcd4259f4ece986f2da9141"}, - {file = "statsmodels-0.13.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:426b1c8ea3918d3d27dbfa38f2bee36cabf41d32163e2cbb3adfb0178b24626a"}, - {file = "statsmodels-0.13.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:45b80fac4a63308b1e93fa9dc27a8598930fd5dfd77c850ca077bb850254c6d7"}, - {file = "statsmodels-0.13.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:78ee69ec0e0f79f627245c65f8a495b8581c2ea19084aac63941815feb15dcf3"}, - {file = "statsmodels-0.13.2-cp38-cp38-win32.whl", hash = "sha256:20483cc30e11aa072b30d307bb80470f86a23ae8fffa51439ca54509d7aa9b05"}, - {file = "statsmodels-0.13.2-cp38-cp38-win_amd64.whl", hash = "sha256:bf43051a92231ccb9de95e4b6d22d3b15e499ee5ee9bff0a20e6b6ad293e34cb"}, - {file = "statsmodels-0.13.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6bf0dfed5f5edb59b5922b295392cd276463b10a5e730f7e57ee4ff2d8e9a87e"}, - {file = "statsmodels-0.13.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a403b559c5586dab7ac0fc9e754c737b017c96cce0ddd66ff9094764cdaf293d"}, - {file = "statsmodels-0.13.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9f23554dd025ea354ce072ba32bfaa840d2b856372e5734290e181d27a1f9e0c"}, - {file = "statsmodels-0.13.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:815f4df713e3eb6f40ae175c71f2a70d32f9219b5b4d23d4e0faab1171ba93ba"}, - {file = "statsmodels-0.13.2-cp39-cp39-win32.whl", hash = "sha256:461c82ab2265fa8457b96afc23ef3ca19f42eb070436e0241b57e58a38863901"}, - {file = "statsmodels-0.13.2-cp39-cp39-win_amd64.whl", hash = "sha256:39daab5a8a9332c8ea83d6464d065080c9ba65f236daf6a64aa18f64ef776fad"}, - {file = "statsmodels-0.13.2.tar.gz", hash = "sha256:77dc292c9939c036a476f1770f9d08976b05437daa229928da73231147cde7d4"}, -] -threadpoolctl = [ - {file = "threadpoolctl-3.1.0-py3-none-any.whl", hash = "sha256:8b99adda265feb6773280df41eece7b2e6561b772d21ffd52e372f999024907b"}, - {file = "threadpoolctl-3.1.0.tar.gz", hash = "sha256:a335baacfaa4400ae1f0d8e3a58d6674d2f8828e3716bb2802c44955ad391380"}, -] -tomli = [ - {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, - {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, -] -torch = [] -tqdm = [] -traitlets = [] -typing-extensions = [] -wcwidth = [ - {file = "wcwidth-0.2.5-py2.py3-none-any.whl", hash = "sha256:beb4802a9cebb9144e99086eff703a642a13d6a0052920003a230f3294bbe784"}, - {file = "wcwidth-0.2.5.tar.gz", hash = "sha256:c4d647b99872929fdb7bdcaa4fbe7f01413ed3d98077df798530e5b04f116c83"}, -] diff --git a/tests/test_Integrational.py b/tests/test_Integrational.py new file mode 100644 index 0000000..013da0c --- /dev/null +++ b/tests/test_Integrational.py @@ -0,0 +1,277 @@ +import pytest +import itertools +import bamt.networks as networks +import bamt.preprocessors as pp +from pgmpy.estimators import K2Score +import pandas as pd +import numpy as np +from pandas.testing import assert_frame_equal +from sklearn import preprocessing +from sklearn.model_selection import train_test_split + + +class Builder: + def __init__(self): + self.data_paths = { + "Continuous": "data/benchmark/auto_price.csv", + "Discrete": "tests/hack_discrete/hack_data.csv", + "Hybrid": "data/benchmark/new_thyroid.csv", + } + + self.tail = { + "Continuous": ["Continuous", "target"], + "Discrete": ["Discrete", "Tectonic regime"], + "Hybrid": ["Hybrid", "target"], + } + + self.scoring = [("K2", K2Score), "BIC", "MI"] + self.optimizer = ["HC"] + self.use_mixture = [False, True] + self.has_logit = [False, True] + + self.static = {} + + self.dynamic = { + "Continuous": [self.use_mixture, [False], self.optimizer, self.scoring], + "Discrete": [[False], [False], self.optimizer, self.scoring], + "Hybrid": [self.use_mixture, self.has_logit, self.optimizer, self.scoring], + } + + def create_from_config(self): + """Method to collect data from config""" + self.static = dict( + Discrete=[self.data_paths["Discrete"], *self.tail["Discrete"]], + Continuous=[self.data_paths["Continuous"], *self.tail["Continuous"]], + Hybrid=[self.data_paths["Hybrid"], *self.tail["Hybrid"]], + evo=[False, False, "Evo", self.scoring[0]], + ) + + def create_evo_item(self, net_type): + evo_item = self.static["evo"][:] + evo_item.insert(0, self.data_paths[net_type]) + evo_item.extend(self.tail[net_type]) + return evo_item + + @staticmethod + def insert_list(loc, what, to): + new = to[:] + new[loc:loc] = what + return new + + def create_net_items(self, net_type): + static = self.static[net_type][:] + dynamic_part = map(list, itertools.product(*self.dynamic[net_type])) + return list(map(lambda x: self.insert_list(1, x, static), dynamic_part)) + + def get_params(self): + self.create_from_config() + params = [] + for net_type in ["Discrete", "Continuous", "Hybrid"]: + params.extend( + self.create_net_items(net_type) + [self.create_evo_item(net_type)] + ) + return params + + +params = Builder().get_params() + + +def initialize_bn(bn_type, use_mixture, has_logit): + if bn_type == "Discrete": + bn = networks.DiscreteBN() + elif bn_type == "Continuous": + bn = networks.ContinuousBN(use_mixture=use_mixture) + elif bn_type == "Hybrid": + bn = networks.HybridBN(has_logit=has_logit, use_mixture=use_mixture) + elif bn_type == "Composite": + bn = networks.CompositeBN() + return bn + + +def prepare_data(directory): + data = pd.read_csv(directory, index_col=0) + train, test = train_test_split(data, test_size=0.33, random_state=42) + + encoder = preprocessing.LabelEncoder() + discretizer = preprocessing.KBinsDiscretizer( + n_bins=5, encode="ordinal", strategy="quantile" + ) + + p = pp.Preprocessor([("encoder", encoder), ("discretizer", discretizer)]) + discretized_data, est = p.apply(train) + info = p.info + return info, discretized_data, train, test + + +class TestNetwork: + # Checking the equality of predictions (trained and loaded network) before and after saving + @pytest.mark.parametrize( + "directory, use_mixture, has_logit, optimizer, scoring, bn_type, target", params + ) + def test_1( + self, directory, use_mixture, has_logit, optimizer, scoring, bn_type, target + ): + test_id = "test_1" + + bn = initialize_bn(bn_type, use_mixture, has_logit) + info, discretized_data, train, test = prepare_data(directory) + bn.add_nodes(info) + if bn_type != "Composite": + bn.add_edges( + discretized_data, + optimizer=optimizer, + scoring_function=scoring, + progress_bar=False, + ) + else: + bn.add_edges(train) + + bn.fit_parameters(train) + predict = bn.predict( + test[[x for x in test.columns if x != target]], progress_bar=False + ) + bn.save("bn") + + bn = initialize_bn(bn_type, use_mixture, has_logit) + bn.load("bn.json") + predict_loaded = bn.predict( + test[[x for x in test.columns if x != target]], progress_bar=False + ) + + try: + assert_frame_equal(pd.DataFrame(predict), pd.DataFrame(predict_loaded)) + print(f"{test_id} runned successfully") + except AssertionError: + print( + f"params: {dict(zip(['use_mixture', 'has_logit', 'optimizer', 'scoring', 'bn_type'], use_mixture, has_logit, optimizer, scoring, bn_type))}" + ) + raise + + # Checking the prediction algorithm (trained network) before and after saving + @pytest.mark.parametrize( + "directory, use_mixture, has_logit, optimizer, scoring, bn_type, target", params + ) + def test_2( + self, directory, use_mixture, has_logit, optimizer, scoring, bn_type, target + ): + test_id = "test_2" + + bn = initialize_bn(bn_type, use_mixture, has_logit) + info, discretized_data, train, test = prepare_data(directory) + bn.add_nodes(info) + if bn_type != "Composite": + bn.add_edges( + discretized_data, + optimizer=optimizer, + scoring_function=scoring, + progress_bar=False, + ) + else: + bn.add_edges(train) + + bn.fit_parameters(train) + predict = bn.predict( + test[[x for x in test.columns if x != target]], progress_bar=False + ) + bn.save("bn") + + predict2 = bn.predict( + test[[x for x in test.columns if x != target]], progress_bar=False + ) + + try: + assert_frame_equal(pd.DataFrame(predict), pd.DataFrame(predict2)) + print(f"{test_id} runned successfully") + except AssertionError: + print( + f"params: {dict(zip(['use_mixture', 'has_logit', 'optimizer', 'scoring', 'bn_type'], use_mixture, has_logit, optimizer, scoring, bn_type))}" + ) + raise + + # Checking network predictions without edges + @pytest.mark.parametrize( + "directory, use_mixture, has_logit, optimizer, scoring, bn_type, target", params + ) + def test_3( + self, directory, use_mixture, has_logit, optimizer, scoring, bn_type, target + ): + test_id = "test_3" + + bn = initialize_bn(bn_type, use_mixture, has_logit) + info, discretized_data, train, test = prepare_data(directory) + bn.add_nodes(info) + bn.fit_parameters(train) + + predict = bn.predict( + test[[x for x in test.columns if x != target]], progress_bar=False + ) + + try: + if info["types"][target] == "cont": + if use_mixture: + mean = bn.distributions[target]["mean"] + w = bn.distributions[target]["coef"] + sample = 0 + for ind, wi in enumerate(w): + sample += wi * mean[ind][0] + else: + sample = train[target].mean() + + assert np.all(np.array(predict[target]) == sample) + + elif info["types"][target] == "disc_num": + most_frequent = train[target].value_counts().index[0] + assert np.all(np.array(predict[target]) == most_frequent) + + print(f"{test_id} runned successfully") + except AssertionError: + print( + f"params: {dict(zip(['use_mixture', 'has_logit', 'optimizer', 'scoring', 'bn_type'], use_mixture, has_logit, optimizer, scoring, bn_type))}" + ) + raise + + # Checking the network trained on the 1 sample + @pytest.mark.parametrize( + "directory, use_mixture, has_logit, optimizer, scoring, bn_type, target", params + ) + def test_4( + self, directory, use_mixture, has_logit, optimizer, scoring, bn_type, target + ): + test_id = "test_4" + + if use_mixture == False: + + bn = initialize_bn(bn_type, use_mixture, has_logit) + info, discretized_data, train, test = prepare_data(directory) + + bn.add_nodes(info) + + train_data_1 = pd.DataFrame(train.iloc[0].to_dict(), index=[0]) + disc_data_1 = pd.DataFrame(discretized_data.iloc[0].to_dict(), index=[0]) + + if bn_type != "Composite": + bn.add_edges( + disc_data_1, + optimizer=optimizer, + scoring_function=scoring, + progress_bar=False, + ) + else: + bn.add_edges(train_data_1) + + bn.fit_parameters(train_data_1) + + predict = bn.predict( + test[[x for x in test.columns if x != target]], progress_bar=False + ) + + try: + assert np.all(np.array(predict[target]) == train_data_1[target][0]) + print(f"{test_id} runned successfully") + except AssertionError: + print( + f"params: {dict(zip(['use_mixture', 'has_logit', 'optimizer', 'scoring', 'bn_type'], use_mixture, has_logit, optimizer, scoring, bn_type))}" + ) + raise + else: + pass From af60e33873847eb8439861054798f0a74ca3b57f Mon Sep 17 00:00:00 2001 From: jrzkaminski Date: Fri, 5 Jul 2024 16:00:23 +0300 Subject: [PATCH 05/15] deleted old scripts --- bamt/networks/base.py | 0 tests/test_Integrational.py | 277 ------------------------------------ 2 files changed, 277 deletions(-) delete mode 100644 bamt/networks/base.py delete mode 100644 tests/test_Integrational.py diff --git a/bamt/networks/base.py b/bamt/networks/base.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/test_Integrational.py b/tests/test_Integrational.py deleted file mode 100644 index 013da0c..0000000 --- a/tests/test_Integrational.py +++ /dev/null @@ -1,277 +0,0 @@ -import pytest -import itertools -import bamt.networks as networks -import bamt.preprocessors as pp -from pgmpy.estimators import K2Score -import pandas as pd -import numpy as np -from pandas.testing import assert_frame_equal -from sklearn import preprocessing -from sklearn.model_selection import train_test_split - - -class Builder: - def __init__(self): - self.data_paths = { - "Continuous": "data/benchmark/auto_price.csv", - "Discrete": "tests/hack_discrete/hack_data.csv", - "Hybrid": "data/benchmark/new_thyroid.csv", - } - - self.tail = { - "Continuous": ["Continuous", "target"], - "Discrete": ["Discrete", "Tectonic regime"], - "Hybrid": ["Hybrid", "target"], - } - - self.scoring = [("K2", K2Score), "BIC", "MI"] - self.optimizer = ["HC"] - self.use_mixture = [False, True] - self.has_logit = [False, True] - - self.static = {} - - self.dynamic = { - "Continuous": [self.use_mixture, [False], self.optimizer, self.scoring], - "Discrete": [[False], [False], self.optimizer, self.scoring], - "Hybrid": [self.use_mixture, self.has_logit, self.optimizer, self.scoring], - } - - def create_from_config(self): - """Method to collect data from config""" - self.static = dict( - Discrete=[self.data_paths["Discrete"], *self.tail["Discrete"]], - Continuous=[self.data_paths["Continuous"], *self.tail["Continuous"]], - Hybrid=[self.data_paths["Hybrid"], *self.tail["Hybrid"]], - evo=[False, False, "Evo", self.scoring[0]], - ) - - def create_evo_item(self, net_type): - evo_item = self.static["evo"][:] - evo_item.insert(0, self.data_paths[net_type]) - evo_item.extend(self.tail[net_type]) - return evo_item - - @staticmethod - def insert_list(loc, what, to): - new = to[:] - new[loc:loc] = what - return new - - def create_net_items(self, net_type): - static = self.static[net_type][:] - dynamic_part = map(list, itertools.product(*self.dynamic[net_type])) - return list(map(lambda x: self.insert_list(1, x, static), dynamic_part)) - - def get_params(self): - self.create_from_config() - params = [] - for net_type in ["Discrete", "Continuous", "Hybrid"]: - params.extend( - self.create_net_items(net_type) + [self.create_evo_item(net_type)] - ) - return params - - -params = Builder().get_params() - - -def initialize_bn(bn_type, use_mixture, has_logit): - if bn_type == "Discrete": - bn = networks.DiscreteBN() - elif bn_type == "Continuous": - bn = networks.ContinuousBN(use_mixture=use_mixture) - elif bn_type == "Hybrid": - bn = networks.HybridBN(has_logit=has_logit, use_mixture=use_mixture) - elif bn_type == "Composite": - bn = networks.CompositeBN() - return bn - - -def prepare_data(directory): - data = pd.read_csv(directory, index_col=0) - train, test = train_test_split(data, test_size=0.33, random_state=42) - - encoder = preprocessing.LabelEncoder() - discretizer = preprocessing.KBinsDiscretizer( - n_bins=5, encode="ordinal", strategy="quantile" - ) - - p = pp.Preprocessor([("encoder", encoder), ("discretizer", discretizer)]) - discretized_data, est = p.apply(train) - info = p.info - return info, discretized_data, train, test - - -class TestNetwork: - # Checking the equality of predictions (trained and loaded network) before and after saving - @pytest.mark.parametrize( - "directory, use_mixture, has_logit, optimizer, scoring, bn_type, target", params - ) - def test_1( - self, directory, use_mixture, has_logit, optimizer, scoring, bn_type, target - ): - test_id = "test_1" - - bn = initialize_bn(bn_type, use_mixture, has_logit) - info, discretized_data, train, test = prepare_data(directory) - bn.add_nodes(info) - if bn_type != "Composite": - bn.add_edges( - discretized_data, - optimizer=optimizer, - scoring_function=scoring, - progress_bar=False, - ) - else: - bn.add_edges(train) - - bn.fit_parameters(train) - predict = bn.predict( - test[[x for x in test.columns if x != target]], progress_bar=False - ) - bn.save("bn") - - bn = initialize_bn(bn_type, use_mixture, has_logit) - bn.load("bn.json") - predict_loaded = bn.predict( - test[[x for x in test.columns if x != target]], progress_bar=False - ) - - try: - assert_frame_equal(pd.DataFrame(predict), pd.DataFrame(predict_loaded)) - print(f"{test_id} runned successfully") - except AssertionError: - print( - f"params: {dict(zip(['use_mixture', 'has_logit', 'optimizer', 'scoring', 'bn_type'], use_mixture, has_logit, optimizer, scoring, bn_type))}" - ) - raise - - # Checking the prediction algorithm (trained network) before and after saving - @pytest.mark.parametrize( - "directory, use_mixture, has_logit, optimizer, scoring, bn_type, target", params - ) - def test_2( - self, directory, use_mixture, has_logit, optimizer, scoring, bn_type, target - ): - test_id = "test_2" - - bn = initialize_bn(bn_type, use_mixture, has_logit) - info, discretized_data, train, test = prepare_data(directory) - bn.add_nodes(info) - if bn_type != "Composite": - bn.add_edges( - discretized_data, - optimizer=optimizer, - scoring_function=scoring, - progress_bar=False, - ) - else: - bn.add_edges(train) - - bn.fit_parameters(train) - predict = bn.predict( - test[[x for x in test.columns if x != target]], progress_bar=False - ) - bn.save("bn") - - predict2 = bn.predict( - test[[x for x in test.columns if x != target]], progress_bar=False - ) - - try: - assert_frame_equal(pd.DataFrame(predict), pd.DataFrame(predict2)) - print(f"{test_id} runned successfully") - except AssertionError: - print( - f"params: {dict(zip(['use_mixture', 'has_logit', 'optimizer', 'scoring', 'bn_type'], use_mixture, has_logit, optimizer, scoring, bn_type))}" - ) - raise - - # Checking network predictions without edges - @pytest.mark.parametrize( - "directory, use_mixture, has_logit, optimizer, scoring, bn_type, target", params - ) - def test_3( - self, directory, use_mixture, has_logit, optimizer, scoring, bn_type, target - ): - test_id = "test_3" - - bn = initialize_bn(bn_type, use_mixture, has_logit) - info, discretized_data, train, test = prepare_data(directory) - bn.add_nodes(info) - bn.fit_parameters(train) - - predict = bn.predict( - test[[x for x in test.columns if x != target]], progress_bar=False - ) - - try: - if info["types"][target] == "cont": - if use_mixture: - mean = bn.distributions[target]["mean"] - w = bn.distributions[target]["coef"] - sample = 0 - for ind, wi in enumerate(w): - sample += wi * mean[ind][0] - else: - sample = train[target].mean() - - assert np.all(np.array(predict[target]) == sample) - - elif info["types"][target] == "disc_num": - most_frequent = train[target].value_counts().index[0] - assert np.all(np.array(predict[target]) == most_frequent) - - print(f"{test_id} runned successfully") - except AssertionError: - print( - f"params: {dict(zip(['use_mixture', 'has_logit', 'optimizer', 'scoring', 'bn_type'], use_mixture, has_logit, optimizer, scoring, bn_type))}" - ) - raise - - # Checking the network trained on the 1 sample - @pytest.mark.parametrize( - "directory, use_mixture, has_logit, optimizer, scoring, bn_type, target", params - ) - def test_4( - self, directory, use_mixture, has_logit, optimizer, scoring, bn_type, target - ): - test_id = "test_4" - - if use_mixture == False: - - bn = initialize_bn(bn_type, use_mixture, has_logit) - info, discretized_data, train, test = prepare_data(directory) - - bn.add_nodes(info) - - train_data_1 = pd.DataFrame(train.iloc[0].to_dict(), index=[0]) - disc_data_1 = pd.DataFrame(discretized_data.iloc[0].to_dict(), index=[0]) - - if bn_type != "Composite": - bn.add_edges( - disc_data_1, - optimizer=optimizer, - scoring_function=scoring, - progress_bar=False, - ) - else: - bn.add_edges(train_data_1) - - bn.fit_parameters(train_data_1) - - predict = bn.predict( - test[[x for x in test.columns if x != target]], progress_bar=False - ) - - try: - assert np.all(np.array(predict[target]) == train_data_1[target][0]) - print(f"{test_id} runned successfully") - except AssertionError: - print( - f"params: {dict(zip(['use_mixture', 'has_logit', 'optimizer', 'scoring', 'bn_type'], use_mixture, has_logit, optimizer, scoring, bn_type))}" - ) - raise - else: - pass From 4f889e196287251cabf7024c1e12ce221cb9e407 Mon Sep 17 00:00:00 2001 From: jrzkaminski Date: Fri, 5 Jul 2024 16:05:30 +0300 Subject: [PATCH 06/15] update requirements --- docs/source/api/builders.rst | 22 ------------ docs/source/api/mi_entropy_gauss.rst | 7 ---- docs/source/api/networks.rst | 32 ----------------- docs/source/api/nodes.rst | 52 --------------------------- docs/source/api/preprocess.rst | 17 --------- docs/source/api/preprocessors.rst | 7 ---- docs/source/api/redef_HC.rst | 7 ---- docs/source/api/redef_info_scores.rst | 7 ---- docs/source/api/utils.rst | 44 ----------------------- pyproject.toml | 2 +- requirements.txt | 2 +- 11 files changed, 2 insertions(+), 197 deletions(-) delete mode 100644 docs/source/api/builders.rst delete mode 100644 docs/source/api/mi_entropy_gauss.rst delete mode 100644 docs/source/api/networks.rst delete mode 100644 docs/source/api/nodes.rst delete mode 100644 docs/source/api/preprocess.rst delete mode 100644 docs/source/api/preprocessors.rst delete mode 100644 docs/source/api/redef_HC.rst delete mode 100644 docs/source/api/redef_info_scores.rst delete mode 100644 docs/source/api/utils.rst diff --git a/docs/source/api/builders.rst b/docs/source/api/builders.rst deleted file mode 100644 index 1283f7f..0000000 --- a/docs/source/api/builders.rst +++ /dev/null @@ -1,22 +0,0 @@ -Builders -======== - -.. automodule:: bamt.builders.builders_base - :members: - :undoc-members: - :show-inheritance: - -.. automodule:: bamt.builders.hc_builder - :members: - :undoc-members: - :show-inheritance: - -.. automodule:: bamt.builders.evo_builder - :members: - :undoc-members: - :show-inheritance: - -.. automodule:: bamt.builders.composite_builder - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/api/mi_entropy_gauss.rst b/docs/source/api/mi_entropy_gauss.rst deleted file mode 100644 index 33d02c0..0000000 --- a/docs/source/api/mi_entropy_gauss.rst +++ /dev/null @@ -1,7 +0,0 @@ -Entropy and Mutual Information -============================== - -.. automodule:: bamt.mi_entropy_gauss - :members: - :undoc-members: - :show-inheritance: \ No newline at end of file diff --git a/docs/source/api/networks.rst b/docs/source/api/networks.rst deleted file mode 100644 index d19e796..0000000 --- a/docs/source/api/networks.rst +++ /dev/null @@ -1,32 +0,0 @@ -Networks Module -=============== - -.. automodule:: bamt.networks.base - :members: - :undoc-members: - :show-inheritance: - -.. automodule:: bamt.networks.continuous_bn - :members: - :undoc-members: - :show-inheritance: - -.. automodule:: bamt.networks.discrete_bn - :members: - :undoc-members: - :show-inheritance: - -.. automodule:: bamt.networks.hybrid_bn - :members: - :undoc-members: - :show-inheritance: - -.. automodule:: bamt.networks.composite_bn - :members: - :undoc-members: - :show-inheritance: - -.. automodule:: bamt.networks.big_brave_bn - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/api/nodes.rst b/docs/source/api/nodes.rst deleted file mode 100644 index 9641e94..0000000 --- a/docs/source/api/nodes.rst +++ /dev/null @@ -1,52 +0,0 @@ -Nodes module -============ - -.. automodule:: bamt.nodes.base - :members: - :undoc-members: - :show-inheritance: - -.. automodule:: bamt.nodes.composite_continuous_node - :members: - :undoc-members: - :show-inheritance: - -.. automodule:: bamt.nodes.composite_discrete_node - :members: - :undoc-members: - :show-inheritance: - -.. automodule:: bamt.nodes.conditional_gaussian_node - :members: - :undoc-members: - :show-inheritance: - -.. automodule:: bamt.nodes.conditional_logit_node - :members: - :undoc-members: - :show-inheritance: - -.. automodule:: bamt.nodes.conditional_mixture_gaussian_node - :members: - :undoc-members: - :show-inheritance: - -.. automodule:: bamt.nodes.discrete_node - :members: - :undoc-members: - :show-inheritance: - -.. automodule:: bamt.nodes.logit_node - :members: - :undoc-members: - :show-inheritance: - -.. automodule:: bamt.nodes.mixture_gaussian_node - :members: - :undoc-members: - :show-inheritance: - -.. automodule:: bamt.nodes.schema - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/api/preprocess.rst b/docs/source/api/preprocess.rst deleted file mode 100644 index 944324e..0000000 --- a/docs/source/api/preprocess.rst +++ /dev/null @@ -1,17 +0,0 @@ -Preprocess module -================= - -.. automodule:: bamt.preprocess.discretization - :members: - :undoc-members: - :show-inheritance: - -.. automodule:: bamt.preprocess.graph - :members: - :undoc-members: - :show-inheritance: - -.. automodule:: bamt.preprocess.numpy_pandas - :members: - :undoc-members: - :show-inheritance: \ No newline at end of file diff --git a/docs/source/api/preprocessors.rst b/docs/source/api/preprocessors.rst deleted file mode 100644 index e5985df..0000000 --- a/docs/source/api/preprocessors.rst +++ /dev/null @@ -1,7 +0,0 @@ -Preprocessing utilities -======================= - -.. automodule:: bamt.preprocessors - :members: - :undoc-members: - :show-inheritance: \ No newline at end of file diff --git a/docs/source/api/redef_HC.rst b/docs/source/api/redef_HC.rst deleted file mode 100644 index 78c37c4..0000000 --- a/docs/source/api/redef_HC.rst +++ /dev/null @@ -1,7 +0,0 @@ -Hill Climbing Algorithm -======================= - -.. automodule:: bamt.redef_HC - :members: - :undoc-members: - :show-inheritance: \ No newline at end of file diff --git a/docs/source/api/redef_info_scores.rst b/docs/source/api/redef_info_scores.rst deleted file mode 100644 index 7151f48..0000000 --- a/docs/source/api/redef_info_scores.rst +++ /dev/null @@ -1,7 +0,0 @@ -Scores redefiner -================ - -.. automodule:: bamt.redef_info_scores - :members: - :undoc-members: - :show-inheritance: \ No newline at end of file diff --git a/docs/source/api/utils.rst b/docs/source/api/utils.rst deleted file mode 100644 index 3003e0a..0000000 --- a/docs/source/api/utils.rst +++ /dev/null @@ -1,44 +0,0 @@ -Utilities -========= - -Math Utilities --------------- - - .. automodule:: bamt.utils.MathUtils - :members: - :undoc-members: - :show-inheritance: - -Graph Utilities --------------- - - .. automodule:: bamt.utils.GraphUtils - :members: - :undoc-members: - :show-inheritance: - -Evolutionary Utilities ----------------------- - - .. automodule:: bamt.utils.EvoUtils - :members: - :undoc-members: - :show-inheritance: - -Evolutionary Utilities --------------- - - .. automodule:: bamt.utils.composite_utils.CompositeGeneticOperators - :members: - :undoc-members: - :show-inheritance: - - .. automodule:: bamt.utils.composite_utils.CompositeModel - :members: - :undoc-members: - :show-inheritance: - - .. automodule:: bamt.utils.composite_utils.MLUtils - :members: - :undoc-members: - :show-inheritance: \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 8000b61..3c14da0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,7 +31,7 @@ scikit-learn = "^1.4.2" scipy = "^1.13.0" pyvis = "^0.3.1" missingno = "^0.5.1" -pgmpy = "^0.1.20" +pgmpy = "^0.1.25" thegolem = "^0.3.3" xgboost = ">=1.7.6" catboost = ">=2.0.0" diff --git a/requirements.txt b/requirements.txt index 374a8a4..de00482 100644 --- a/requirements.txt +++ b/requirements.txt @@ -18,4 +18,4 @@ pyvis>=0.3.1 # TODO: exclude these libraries gmr==1.6.2 -pgmpy==0.1.20 \ No newline at end of file +pgmpy==0.1.25 \ No newline at end of file From cc3672e235cc19d93f3e7e8a758a29bf998b06c7 Mon Sep 17 00:00:00 2001 From: jrzkaminski Date: Fri, 5 Jul 2024 16:11:40 +0300 Subject: [PATCH 07/15] remove old documentation --- docs/Makefile | 20 --- docs/make.bat | 35 ----- docs/source/bnalgs/applied_tasks.rst | 2 - docs/source/bnalgs/big_bns.rst | 55 -------- docs/source/bnalgs/bn_learning.rst | 77 ----------- docs/source/conf.py | 79 ----------- docs/source/data_section/data_management.rst | 15 --- .../examples/add_ml_models_to_nodes.rst | 102 -------------- .../examples/composite_network_example.rst | 39 ------ docs/source/examples/learn_params_vis.rst | 69 ---------- .../examples/learn_sampling_predict.rst | 89 ------------ docs/source/examples/learn_save.rst | 68 ---------- docs/source/examples/logger_settings.rst | 98 -------------- .../examples/read_structure_and_params.rst | 32 ----- .../read_structure_param_learning.rst | 52 ------- docs/source/getting_started/cite_us.rst | 67 --------- docs/source/getting_started/contribution.rst | 127 ------------------ docs/source/getting_started/faq.rst | 30 ----- docs/source/getting_started/install.rst | 32 ----- docs/source/index.rst | 103 -------------- .../models/bayesiannetworks/base_network.rst | 78 ----------- .../models/bayesiannetworks/composite_bn.rst | 63 --------- .../models/bayesiannetworks/continuous_bn.rst | 90 ------------- .../models/bayesiannetworks/discrete_bn.rst | 85 ------------ .../models/bayesiannetworks/hybrid_bn.rst | 88 ------------ .../models/bayesiannetworks/large_bn_algs.rst | 59 -------- .../bayesiannetworks/models_storing.rst | 4 - .../bayesiannetworks/sampling_predicting.rst | 26 ---- docs/source/tutorials/tutorials_gists.rst | 8 -- 29 files changed, 1692 deletions(-) delete mode 100644 docs/Makefile delete mode 100644 docs/make.bat delete mode 100644 docs/source/bnalgs/applied_tasks.rst delete mode 100644 docs/source/bnalgs/big_bns.rst delete mode 100644 docs/source/bnalgs/bn_learning.rst delete mode 100644 docs/source/conf.py delete mode 100644 docs/source/data_section/data_management.rst delete mode 100644 docs/source/examples/add_ml_models_to_nodes.rst delete mode 100644 docs/source/examples/composite_network_example.rst delete mode 100644 docs/source/examples/learn_params_vis.rst delete mode 100644 docs/source/examples/learn_sampling_predict.rst delete mode 100644 docs/source/examples/learn_save.rst delete mode 100644 docs/source/examples/logger_settings.rst delete mode 100644 docs/source/examples/read_structure_and_params.rst delete mode 100644 docs/source/examples/read_structure_param_learning.rst delete mode 100644 docs/source/getting_started/cite_us.rst delete mode 100644 docs/source/getting_started/contribution.rst delete mode 100644 docs/source/getting_started/faq.rst delete mode 100644 docs/source/getting_started/install.rst delete mode 100644 docs/source/index.rst delete mode 100644 docs/source/models/bayesiannetworks/base_network.rst delete mode 100644 docs/source/models/bayesiannetworks/composite_bn.rst delete mode 100644 docs/source/models/bayesiannetworks/continuous_bn.rst delete mode 100644 docs/source/models/bayesiannetworks/discrete_bn.rst delete mode 100644 docs/source/models/bayesiannetworks/hybrid_bn.rst delete mode 100644 docs/source/models/bayesiannetworks/large_bn_algs.rst delete mode 100644 docs/source/models/bayesiannetworks/models_storing.rst delete mode 100644 docs/source/models/bayesiannetworks/sampling_predicting.rst delete mode 100644 docs/source/tutorials/tutorials_gists.rst diff --git a/docs/Makefile b/docs/Makefile deleted file mode 100644 index d0c3cbf..0000000 --- a/docs/Makefile +++ /dev/null @@ -1,20 +0,0 @@ -# Minimal makefile for Sphinx documentation -# - -# You can set these variables from the command line, and also -# from the environment for the first two. -SPHINXOPTS ?= -SPHINXBUILD ?= sphinx-build -SOURCEDIR = source -BUILDDIR = build - -# Put it first so that "make" without argument is like "make help". -help: - @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) - -.PHONY: help Makefile - -# Catch-all target: route all unknown targets to Sphinx using the new -# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). -%: Makefile - @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/make.bat b/docs/make.bat deleted file mode 100644 index dc1312a..0000000 --- a/docs/make.bat +++ /dev/null @@ -1,35 +0,0 @@ -@ECHO OFF - -pushd %~dp0 - -REM Command file for Sphinx documentation - -if "%SPHINXBUILD%" == "" ( - set SPHINXBUILD=sphinx-build -) -set SOURCEDIR=source -set BUILDDIR=build - -%SPHINXBUILD% >NUL 2>NUL -if errorlevel 9009 ( - echo. - echo.The 'sphinx-build' command was not found. Make sure you have Sphinx - echo.installed, then set the SPHINXBUILD environment variable to point - echo.to the full path of the 'sphinx-build' executable. Alternatively you - echo.may add the Sphinx directory to PATH. - echo. - echo.If you don't have Sphinx installed, grab it from - echo.https://www.sphinx-doc.org/ - exit /b 1 -) - -if "%1" == "" goto help - -%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% -goto end - -:help -%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% - -:end -popd diff --git a/docs/source/bnalgs/applied_tasks.rst b/docs/source/bnalgs/applied_tasks.rst deleted file mode 100644 index b5fcbc0..0000000 --- a/docs/source/bnalgs/applied_tasks.rst +++ /dev/null @@ -1,2 +0,0 @@ -Real-world Bayesian Networks Applications -========================================= \ No newline at end of file diff --git a/docs/source/bnalgs/big_bns.rst b/docs/source/bnalgs/big_bns.rst deleted file mode 100644 index 152caa3..0000000 --- a/docs/source/bnalgs/big_bns.rst +++ /dev/null @@ -1,55 +0,0 @@ -Algorithms for Large Bayesian Networks learning -=============================================== - -BigBraveBN ----------- - -The BigBraveBN has been built using BAMT, that was developed by our laboratory team. -As an algorithm for finding the optimal structure of BN, we chose the greedy Hill-Climbing algorithm with the K2 score function, which we have already implemented in the BAMT library. -The idea of this algorithm is quite simple, we start searching for a structure from an empty graph and add, delete or reverse one edge at each iteration, and if the value of the score function improves, we fix this action with an edge. -To limit the search space, we propose our algorithm based on the Brave coefficient. -This coefficient measures the mutual occurrence of variables when clustering is applied to the data set. -The algorithm of Brave coefficient calculation is shown in the following figure. - - -.. image:: ../../images/brave_coefficient_calculation.png - :scale: 60 % - :align: center - - -In the first step, we initialize a matrix that represents n nearest neighbours for every variable (groups). -In the second step for every pair of variables Brave coefficient is calculated using formula. - -.. math:: - Br = \frac{a \times n + (a + c) \times (a + b)}{\sqrt{(a + c) \times (b + d)} + \sqrt{(a + b) \times (c + d)}} - - -In this formula, *a* is the number of groups in which both features fell, -*b* and *c* are the numbers of groups in which one feature fell, but the second did not fall, -*d* is the number of groups in which none of the features fell, *n* - dataset size. - -Here is the workflow of the algorithm on the figure below. - - -.. image:: ../../images/BigBraveBN_workflow.png - :align: center - - -* Initialize data set; -* Choose a proper proximity measure; -* Apply proximity measure to the data set; -* Get N nearest neighbors by proximity for every variable; -* Calculate Brave coefficient of mutual occurrence on nearest neighbors; -* Get Brave coefficient matrix; -* Generate white list of possible edges by setting a threshold; -* Perform structure learning via BAMT tools. - -To form N-nearest neighbours different proximity measures can be used. But some of them have certain limitations. -Pearson correlation coefficient and MI metrics were reviewed for being used in the algorithm. -MI metric was chosen as a default metric due to its versatility and efficiency. - -Here, when we run the Hill-Climbing algorithm, we limit the search space by giving the algorithm a so-called white list. -The white list is a predefined manually or by a certain algorithm list of edges, that limits the solution space by restricting any other connection except itemized in the white list. -Thus, the Bayesian Network can only consist of edges that are included in a specific white list. - -For more information about the algorithm, please, refer to the following `paper `__. diff --git a/docs/source/bnalgs/bn_learning.rst b/docs/source/bnalgs/bn_learning.rst deleted file mode 100644 index c5da292..0000000 --- a/docs/source/bnalgs/bn_learning.rst +++ /dev/null @@ -1,77 +0,0 @@ -Bayesian Networks Learning Algorithms -===================================== - -A Bayesian network is a graphical probabilistic model that is a directed acyclic graph in which nodes are features in the data and edges are conditional dependencies between features. - - -.. image:: ../../images/bnla_model.png - :align: center - - -The use of Bayesian networks implies the existence of algorithms for learning the structure and parameters of distributions at the nodes. - -Structure Learning of Bayesian Networks ---------------------------------------- - -Now, the library implements structural learning algorithms that consider the task of learning the BN as an optimization problem: - -.. math:: - V_{opt}, E_{opt}=\underset{G' \subset G_{possible}}{argmax}F(G') - -The following scoring functions are implemented as evaluation functions of the network quality: *K2*, *BIC*, *AIC* and *MI*. -Moreover, the *BIC*, *AIC* and *MI* metrics can be used both on discretized data and on mixed ones -(for more details, see the `publication `__). - -As an optimization algorithm, the greedy Hill Climbing algorithm is used, which iteratively changes the structure and remembers the change that leads to the greatest increase in score: - - - -.. image:: ../../images/HC_scheme_disser.png - :align: center - - -Since greedy algorithms have their drawbacks, we plan to add evolutionary algorithms for structure learning in the future. -Also, our framework allows you to include expert knowledge in the process of structural network learning. -This is done by limiting the search space (white edges and black edges), -it is also possible to set the starting graph from which optimization begins and to restrict or allow the removal of the edges of the starting graph. -All this allows you to flexibly use expert knowledge in the learning process. - -Also in this framework, a variant of structural learning is proposed, when connections from continuous to discrete nodes are allowed (has_logit=true). -The fact is that for a long time Bayesian networks were unable to model conditional distributions in a discrete node with continuous parents. -The solution was then found using classification models, however if you want to limit the appearance of such relationships this can be done with the ‘has_logit’ flag. - - -Parameter Learning of Bayesian Networks ---------------------------------------- - -In addition to structural learning in Bayesian networks, there is learning of distribution parameters at nodes. As a parameter learning algorithm, this framework implements the likelihood maximization algorithm. With the available dataset -*D*, it is necessary to select an estimate for θ parameter that satisfies the condition: - -.. math:: - L(\widehat{\theta}:D)=\underset{\theta\subset\Theta }{max}L(\theta:D) - -By default, distributions at network nodes are modeled using Gaussian distributions and linear regression for continuous nodes and conditional probability tables (CPT) for discrete nodes. - - -.. image:: ../../images/params_learning.png - :align: center - - -However, this approach does not model real data very well, in which there is a clear non-Gaussianity and non-linearity. -For such cases, it is proposed, for example, to use mixtures of Gaussian distributions, since with a sufficiently large number of components, the mixture can describe a distribution of any shape (parameter ``use_mixture``). -In the framework for parametric learning, automatic selection of the number of components is implemented. - - -.. image:: ../../images/mixture_edge.png - :align: center - - -A non-parametric approach to the representation of distributions is also implemented, when any machine learning model is used to predict the parameters of the conditional distribution, -so the conditional mathematical expectation can be predicted quite accurately, and the conditional dispersion is a prediction error. -You can choose this method with the help of the ``set_classifier()`` and ``set_regressor()`` methods. -Now the user must choose the model himself, but in the future an algorithm for automatic selection of models will be added. - - -.. image:: ../../images/logit_net.png - :align: center - diff --git a/docs/source/conf.py b/docs/source/conf.py deleted file mode 100644 index 92a1e5f..0000000 --- a/docs/source/conf.py +++ /dev/null @@ -1,79 +0,0 @@ -# Configuration file for the Sphinx documentation builder. -# -# For the full list of built-in configuration values, see the documentation: -# https://www.sphinx-doc.org/en/master/usage/configuration.html - -# -- Path setup -------------------------------------------------------------- - -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. - -import datetime -import sys -from pathlib import Path - -sys.path.insert(0, str(Path(__file__).parent / "../../")) - -# -- Project information ----------------------------------------------------- -# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information - -project = "BAMT" -copyright = "2023, NSS lab" -author = "NSS lab" -release = "0.1.0" - -# -- General configuration --------------------------------------------------- -# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration - -extensions = [ - "myst_parser", - "sphinx_rtd_theme", - "sphinx.ext.autodoc", - "sphinx.ext.coverage", - "sphinx.ext.napoleon", - "sphinx.ext.viewcode", - "sphinx.ext.mathjax", - "sphinx.ext.autosummary", - "sphinx.ext.autodoc.typehints", - "sphinx.ext.graphviz", - "sphinx.ext.todo", -] - -templates_path = ["_templates"] -exclude_patterns = [] - - -# -- Options for HTML output ------------------------------------------------- -# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output - -html_theme = "sphinx_rtd_theme" -# html_static_path = ['_static'] - -# -- Extension configuration ------------------------------------------------- - -napoleon_google_docstring = True -napoleon_numpy_docstring = False -napoleon_include_init_with_doc = False -napoleon_include_private_with_doc = True -napoleon_include_special_with_doc = True -napoleon_use_admonition_for_examples = True -napoleon_use_admonition_for_notes = True -napoleon_use_admonition_for_references = False -napoleon_use_ivar = True -napoleon_use_keyword = True -napoleon_use_param = True -napoleon_use_rtype = True -napoleon_attr_annotations = False - -autodoc_default_options = { - "members": True, - "undoc-members": False, - "show-inheritance": True, - "member-order": "bysource", - "ignore-module-all": True, -} -autoclass_content = "class" -autodoc_typehints = "signature" -autodoc_typehints_format = "short" -autodoc_mock_imports = ["objgraph", "memory_profiler", "gprof2dot", "snakeviz"] diff --git a/docs/source/data_section/data_management.rst b/docs/source/data_section/data_management.rst deleted file mode 100644 index 87eaa22..0000000 --- a/docs/source/data_section/data_management.rst +++ /dev/null @@ -1,15 +0,0 @@ -Data Management -=============== - -BAMT is designed to hadle three types of data: - -#. ``disc`` - discrete data (e.g. some sort of categorical data), python data types: - ``['str', 'O', 'b', 'categorical', 'object', 'bool']`` - -#. ``disc_num`` - discrete numerical data, python data types: ``['int32', 'int64']`` - -#. ``cont`` - continuous data (e.g. some sort of numerical data), python data types: ``['float32', 'float64]`` - -If, for example, your data set contains a pd.Series of integers, but these integers are actually categories, you should -consider converting them to strings. This is because BAMT will treat them as ``disc_num`` data, which may not be what you want. -The same applies to floats, which will be treated as ``cont`` data. diff --git a/docs/source/examples/add_ml_models_to_nodes.rst b/docs/source/examples/add_ml_models_to_nodes.rst deleted file mode 100644 index 2459f8b..0000000 --- a/docs/source/examples/add_ml_models_to_nodes.rst +++ /dev/null @@ -1,102 +0,0 @@ -Adding Machine Learning models to Bayesian Network nodes -======================================================== - -BAMT supports adding machine learning models to Bayesian Network nodes. - -First, lets import BAMT modules and required machine learning modules. - -.. code-block:: python - - import bamt.networks as networks - import bamt.preprocessors as pp - - import pandas as pd - import numpy as np - import matplotlib.pyplot as plt - - from sklearn import preprocessing - from sklearn.ensemble import RandomForestClassifier - from sklearn.neighbors import KNeighborsClassifier - from sklearn.tree import DecisionTreeClassifier - from sklearn.ensemble.RandomForestRegressor import RandomForestRegressor - from sklearn.linear_model.LinearRegression import LinearRegression - - from pgmpy.estimators import K2Score - -Let's start with data importing and preprocessing. - -.. code-block:: python - - # Importing data - data = pd.read_csv(r'../Data/real data/vk_data.csv') - - # Choose columns - cols = ['age', - 'sex', - 'has_pets', - 'is_parent', - 'relation', - 'is_driver', - 'tr_per_month', - 'median_tr', - 'mean_tr'] - data = data[cols] - data[['sex', - 'has_pets', - 'is_parent', - 'relation', - 'is_driver']] = data[['sex', - 'has_pets', - 'is_parent', - 'relation', - 'is_driver']].astype(str) - - # Preprocessing - - encoder = preprocessing.LabelEncoder() - discretizer = preprocessing.KBinsDiscretizer(n_bins=5, encode='ordinal', strategy='quantile') - - p = pp.Preprocessor([('encoder', encoder), ('discretizer', discretizer)]) - discretized_data, est = p.apply(data) - - info = p.info - info - -Next, we initialize Bayesian Network object and add nodes to it. - -.. code-block:: python - - bn = networks.HybridBN(has_logit=True, use_mixture=True) - bn.add_nodes(info) - -After adding nodes we can perform structure learning. - -.. code-block:: python - - bn.add_edges(discretized_data, scoring_function=('K2',K2Score)) - -Finally, before parameters learning, we can add machine learning models to nodes. -Let's add classifier models to discrete nodes and regressor models to continuous nodes and perform parameters learning. - -.. code-block:: python - - bn.set_classifiers(classifiers={'age': DecisionTreeClassifier(), - 'relation': RandomForestClassifier(), - 'is_driver': KNeighborsClassifier(n_neighbors=2)}) - bn.set_regressors(regressors={'tr_per_month': RandomForestRegressor(), - 'mean_tr': LinearRegression()}) - - bn.fit_parameters(data) - -Now, we can save the model to load it later. - -.. code-block:: python - - bn.save('vk_model.json') - bn.load('vk_model.json') - -Or visualize it (the html won't be rendered in jupyter notebook, but it will be rendered in html file and saved): - -.. code-block:: python - - bn.plot('vk_model.html') diff --git a/docs/source/examples/composite_network_example.rst b/docs/source/examples/composite_network_example.rst deleted file mode 100644 index 435d8b8..0000000 --- a/docs/source/examples/composite_network_example.rst +++ /dev/null @@ -1,39 +0,0 @@ -Learning CompositeBN and sampling from it -========================================= - -Here is a simple working example of how one can learn composite bn, look at the models -that were applied to nodes -and sample some data - -.. code-block:: python - - # data reading and preprocessing - data = pd.read_csv(r"data/benchmark/healthcare.csv", index_col=0) - print(data.dtypes) - encoder = preprocessing.LabelEncoder() - p = pp.Preprocessor([("encoder", encoder)]) - - preprocessed_data, _ = p.apply(data) - print(preprocessed_data.head(5)) - - # initialize empty network - bn = CompositeBN() - - info = p.info - - # add initial nodes - bn.add_nodes(info) - - # learn structure - bn.add_edges(data) - - # learn parameters - bn.fit_parameters(data) - - # get info about models in nodes - bn.get_info(as_df=False) - - # sample some data - data_sampled = bn.sample(200) - - print(data_sampled) \ No newline at end of file diff --git a/docs/source/examples/learn_params_vis.rst b/docs/source/examples/learn_params_vis.rst deleted file mode 100644 index 013d20f..0000000 --- a/docs/source/examples/learn_params_vis.rst +++ /dev/null @@ -1,69 +0,0 @@ -Learn and Visualize Bayesian Network -==================================== - -Used imports: - -.. code-block:: python - - import bamt.networks as networks - import bamt.preprocessors as pp - - import pandas as pd - import numpy as np - import matplotlib.pyplot as plt - - from sklearn import preprocessing - from sklearn.ensemble import RandomForestClassifier - from sklearn.neighbors import KNeighborsClassifier - from sklearn.tree import DecisionTreeClassifier - - from pgmpy.estimators import K2Score - -Importing example data: - -.. code-block:: python - - data = pd.read_csv(r'../Data/real data/vk_data.csv') - data - -Choosing a chunk of data: - -.. code-block:: python - - cols = ['age', 'sex', 'has_pets', 'is_parent', 'relation', 'is_driver', 'tr_per_month', 'median_tr', 'mean_tr'] - data = data[cols] - data[['sex', 'has_pets', 'is_parent', 'relation', 'is_driver']] = data[['sex', 'has_pets', 'is_parent', 'relation', 'is_driver']].astype(str) - -Preprocessing data, encode categorical features and discretize numerical features, initialize BN and add nodes: - -.. code-block:: python - - encoder = preprocessing.LabelEncoder() - discretizer = preprocessing.KBinsDiscretizer(n_bins=5, encode='ordinal', strategy='quantile') - - p = pp.Preprocessor([('encoder', encoder), ('discretizer', discretizer)]) - discretized_data, est = p.apply(data) - - bn = networks.HybridBN(has_logit=True, use_mixture=True) # init BN - info = p.info - info - bn.add_nodes(info) - -Learning BN structure and parameters with HillClimbing algorithm: - -.. code-block:: python - - - bn.add_edges(discretized_data, scoring_function=('K2',K2Score)) - bn.set_classifiers(classifiers={'age': DecisionTreeClassifier(), - 'relation': RandomForestClassifier(), - 'is_driver': KNeighborsClassifier(n_neighbors=2)}) - bn.fit_parameters(data) - -Visualize BN structure: - -.. code-block:: python - - bn.plot('bn.html') - -The visualized BN structure will not be rendered by jupyter notebook, but you can see it in the root directory of the project. diff --git a/docs/source/examples/learn_sampling_predict.rst b/docs/source/examples/learn_sampling_predict.rst deleted file mode 100644 index d6b2fd7..0000000 --- a/docs/source/examples/learn_sampling_predict.rst +++ /dev/null @@ -1,89 +0,0 @@ -Sampling and Predicting -======================= - -Used imports: - -.. code-block:: python - - import bamt.networks as networks - import bamt.preprocessors as pp - - import pandas as pd - import numpy as np - import matplotlib.pyplot as plt - - from sklearn import preprocessing - from sklearn.metrics import accuracy_score, mean_squared_error - from sklearn.ensemble import RandomForestClassifier - from sklearn.neighbors import KNeighborsClassifier - from sklearn.tree import DecisionTreeClassifier - from sklearn.model_selection import train_test_split - - from pgmpy.estimators import K2Score - -Importing example data: - -.. code-block:: python - - data = pd.read_csv(r'../Data/real data/vk_data.csv') - data - -Choosing a chunk of data: - -.. code-block:: python - - cols = ['age', 'sex', 'has_pets', 'is_parent', 'relation', 'is_driver', 'tr_per_month', 'median_tr', 'mean_tr'] - data = data[cols] - data[['sex', 'has_pets', 'is_parent', 'relation', 'is_driver']] = data[['sex', 'has_pets', 'is_parent', 'relation', 'is_driver']].astype(str) - -Preprocessing data, encode categorical features and discretize numerical features, initialize BN and add nodes: - -.. code-block:: python - - encoder = preprocessing.LabelEncoder() - discretizer = preprocessing.KBinsDiscretizer(n_bins=5, encode='ordinal', strategy='quantile') - - p = pp.Preprocessor([('encoder', encoder), ('discretizer', discretizer)]) - discretized_data, est = p.apply(data) - - bn = HybridBN(has_logit=True, use_mixture=True) # init BN - info = p.info - info - bn.add_nodes(info) - -Learning BN structure with HillClimbing algorithm: - -.. code-block:: python - - - bn.add_edges(discretized_data, scoring_function=('K2',K2Score)) - -Learning distribution parameters and sampling data: - -.. code-block:: python - - bn.fit_parameters(data) - sampled_data = bn.sample(1000) # sample 1000 data points - -It is also possible to set where model should stores the data: - -.. code-block:: python - - bn.sample(1000, models_dir = 'path/to/dir') - -Splitting data into train and test sets and dropping target column: - -.. code-block:: python - - data_test, data_train = train_test_split(data, test_size=0.2, random_state=42) - - data_test = data_test.drop(columns=['tr_per_month']) - -Predicting the target column with BN: - -.. code-block:: python - - bn.fit_parameters(data_train) - - # parall_count is the number of parallel threads to use - predictions = bn.predict(test=data_test, parall_count=4) diff --git a/docs/source/examples/learn_save.rst b/docs/source/examples/learn_save.rst deleted file mode 100644 index 9e9ac8b..0000000 --- a/docs/source/examples/learn_save.rst +++ /dev/null @@ -1,68 +0,0 @@ -Learn and Save Bayesian Network -=============================== - -Used imports: - -.. code-block:: python - - from bamt.preprocessors import Preprocessor - import pandas as pd - from sklearn import preprocessing as pp - from bamt.networks import HybridBN - - -Let's start with data loading and preprocessing: - - -.. code-block:: python - - data = pd.read_csv("data/real data/hack_processed_with_rf.csv")[ - ['Tectonic regime', 'Period', 'Lithology', 'Structural setting', - 'Gross', 'Netpay', 'Porosity', 'Permeability', 'Depth']] - - # set encoder and discretizer - encoder = pp.LabelEncoder() - discretizer = pp.KBinsDiscretizer(n_bins=5, encode='ordinal', strategy='uniform') - - # create preprocessor object with encoder and discretizer - p = Preprocessor([('encoder', encoder), ('discretizer', discretizer)]) - - # discretize data for structure learning - discretized_data, est = p.apply(data) - - # get information about data - info = p.info - -Then we create a network object and perform structure and parameters learning: - -.. code-block:: python - - # initialize network object - bn = HybridBN(use_mixture=True, has_logit=True) - - # add nodes to network - bn.add_nodes(info) - - # using mutual information as scoring function for structure learning - bn.add_edges(discretized_data, scoring_function=('MI',)) - - # or use evolutionary algorithm to learn structure - - bn.add_edges(discretized_data, optimizer = 'evo') - - bn.fit_parameters(data) - -To save structure and parameters of the network separately, we can use the following code: - -.. code-block:: python - - # saving structure - bn.save_structure("hack_structure.json") - # saving parameters - bn.save_params("hack_p.json") - -Or, if we want to save the whole network, we can use: - -.. code-block:: python - - bn.save("hack_network.json") diff --git a/docs/source/examples/logger_settings.rst b/docs/source/examples/logger_settings.rst deleted file mode 100644 index 93589f3..0000000 --- a/docs/source/examples/logger_settings.rst +++ /dev/null @@ -1,98 +0,0 @@ -Setting up loggers in BAMT -=============================== - -Used imports: - -.. code-block:: python - - import pandas as pd - - from sklearn import preprocessing as pp - - import bamt.preprocessors as preprocessors - from bamt.networks import ContinuousBN - - from bamt.log import bamt_logger - -There are 2 methods to use: ``switch_console_out`` and ``switch_file_out`` of ``bamt_logger``. - -By default, bamt will print out messages in console and will not use any log files. - -How to turn off/on console output? -_______________________________ - -Let's consider this example: - -.. code-block:: python - - def learn_bn(): - hack_data = pd.read_csv("data/real data/hack_processed_with_rf.csv")[ - [ - "Tectonic regime", - "Period", - "Lithology", - "Structural setting", - "Gross", - "Netpay", - "Porosity", - "Permeability", - "Depth", - ] - ].dropna() - - encoder = pp.LabelEncoder() - discretizer = pp.KBinsDiscretizer(n_bins=5, encode="ordinal", strategy="quantile") - - p = preprocessors.Preprocessor([("encoder", encoder), ("discretizer", discretizer)]) - - discretized_data, est = p.apply(hack_data) - - bn = ContinuousBN() - info = p.info - - bn.add_nodes(info) # here you will get an error - - learn_bn() - # The error: - # 2023-12-14 16:20:05,010 | ERROR | base.py-add_nodes-0090 | Continuous BN does not support discrete data - -Remove output: - -.. code-block:: python - - bamt_logger.switch_console_out(False) - learn_bn() # only KeyError from Python - -After this you will no longer receive messages from all loggers of BAMT. - -To revert changes just use: - -.. code-block:: python - - bamt_logger.switch_console_out(True) - learn_bn() - - # return - # 2023-12-14 16:20:05,010 | ERROR | base.py-add_nodes-0090 | Continuous BN does not support discrete data - -How to turn on/off log files for BAMT? -______________________________________ - -In order to redirect errors to log file: - -.. code-block:: python - - bamt_logger.switch_file_out(True, - log_file="") # only absolute path - learn_bn() - # log file - # 2023-12-14 16:34:23,414 | ERROR | base.py-add_nodes-0090 | Continuous BN does not support discrete data - - -To revert this (it will not delete log created before): - -.. code-block:: python - - bamt_logger.switch_file_out(False) # only absolute path - learn_bn() - # log file: no new messages. \ No newline at end of file diff --git a/docs/source/examples/read_structure_and_params.rst b/docs/source/examples/read_structure_and_params.rst deleted file mode 100644 index 7932957..0000000 --- a/docs/source/examples/read_structure_and_params.rst +++ /dev/null @@ -1,32 +0,0 @@ -Read Structure and Distribution Parameters from a File -====================================================== - -Used imports: - -.. code-block:: python - - from bamt.preprocessors import Preprocessor - import pandas as pd - from sklearn import preprocessing as pp - from bamt.networks import HybridBN - import json - -You can read the pre-trained structure and distribution parameters from a file. -This is useful if you do not want to wait for the structure learning every time you run the script or cell. - -Here is an example of how to read structure and distribution parameters from a file: - -.. code-block:: python - - bn = Networks.HybridBN(use_mixture=True, has_logit=True) - - bn.load("network_pretrained.json") - -It is also possible to read structure and distribution parameters separately, if you saved them separately: - -.. code-block:: python - - bn = HybridBN(use_mixture=True, has_logit=True) - - bn.load("network_pretrained_structure.json") - bn.load("network_pretrained_distribution.json") diff --git a/docs/source/examples/read_structure_param_learning.rst b/docs/source/examples/read_structure_param_learning.rst deleted file mode 100644 index 3ce3ba5..0000000 --- a/docs/source/examples/read_structure_param_learning.rst +++ /dev/null @@ -1,52 +0,0 @@ -Read BN Structure from a File, Learn Distribution Parameters -============================================================ - -Used imports: - -.. code-block:: python - - from bamt.preprocessors import Preprocessor - import pandas as pd - from sklearn import preprocessing as pp - from bamt.networks import HybridBN - import json - -There are two options for loading a BN structure. The first is to read it directly from a JSON file: - - -.. code-block:: python - - bn = HybridBN(use_mixture=True, has_logit=True) - - bn2.load("structure.json") - - -The second one is to set it manually using list of edges, but first nodes should be added: - -.. code-block:: python - - encoder = preprocessing.LabelEncoder() - discretizer = preprocessing.KBinsDiscretizer(n_bins=5, encode='ordinal', strategy='quantile') - - p = pp.Preprocessor([('encoder', encoder), ('discretizer', discretizer)]) - discretized_data, est = p.apply(data) - - info = p.info - - bn.add_nodes(info) - - structure = [("Tectonic regime", "Structural setting"), - ("Gross", "Netpay"), - ("Lithology", "Permeability")] - - bn.set_structure(edges=structure) - -The next step is to learn parameters from data, to do this we need to read the data and perform parameters learning: - -.. code-block:: python - - # reading data - data = pd.read_csv("data.csv") - - # parameters learning - bn.fit_parameters(data) diff --git a/docs/source/getting_started/cite_us.rst b/docs/source/getting_started/cite_us.rst deleted file mode 100644 index 2ab7f2f..0000000 --- a/docs/source/getting_started/cite_us.rst +++ /dev/null @@ -1,67 +0,0 @@ -Cite us -======= - -These are articles that you can use to cite us or make a reference to us: - - -@misc{BAMT, - author={BAMT}, - title = {Repository experiments and data}, - year = {2021}, - publisher = {GitHub}, - journal = {GitHub repository}, - howpublished = {\url{https://github.com/ITMO-NSS-team/BAMT.git}}, - url = {https://github.com/ITMO-NSS-team/BAMT.git} -} - -@article{deeva2023advanced, - title={Advanced Approach for Distributions Parameters Learning in Bayesian Networks with Gaussian Mixture Models and Discriminative Models}, - author={Deeva, Irina and Bubnova, Anna and Kalyuzhnaya, Anna V}, - journal={Mathematics}, - volume={11}, - number={2}, - pages={343}, - year={2023}, - publisher={MDPI} -} - -@inproceedings{deeva2021oil, - title={Oil and Gas Reservoirs Parameters Analysis Using Mixed Learning of Bayesian Networks}, - author={Deeva, Irina and Bubnova, Anna and Andriushchenko, Petr and Voskresenskiy, Anton and Bukhanov, Nikita and Nikitin, Nikolay O and Kalyuzhnaya, Anna V}, - booktitle={Computational Science--ICCS 2021: 21st International Conference, Krakow, Poland, June 16--18, 2021, Proceedings, Part I}, - pages={394--407}, - year={2021}, - organization={Springer} -} - - - -@article{bubnova2021mixbn, - title={MIxBN: Library for learning Bayesian networks from mixed data}, - author={Bubnova, Anna V and Deeva, Irina and Kalyuzhnaya, Anna V}, - journal={Procedia Computer Science}, - volume={193}, - pages={494--503}, - year={2021}, - publisher={Elsevier} -} - - - -@inproceedings{deeva2020bayesian, - title={Bayesian Networks-based personal data synthesis}, - author={Deeva, Irina and Andriushchenko, Petr D and Kalyuzhnaya, Anna V and Boukhanovsky, Alexander V}, - booktitle={Proceedings of the 6th EAI International Conference on Smart Objects and Technologies for Social Good}, - pages={6--11}, - year={2020} -} - -@article{kaminsky2022bigbravebn, - title={BigBraveBN: algorithm of structural learning for bayesian networks with a large number of nodes}, - author={Kaminsky, Yury and Deeva, Irina}, - journal={Procedia Computer Science}, - volume={212}, - pages={191--200}, - year={2022}, - publisher={Elsevier} -} diff --git a/docs/source/getting_started/contribution.rst b/docs/source/getting_started/contribution.rst deleted file mode 100644 index 740d898..0000000 --- a/docs/source/getting_started/contribution.rst +++ /dev/null @@ -1,127 +0,0 @@ -How to contribute -================= - -We highly encourage you to contribute to the project. You can do this by forking the project on GitHub and sending us a pull request. We will review your code and merge it into the project if it is good. - -Step-by-step guide ------------------- - -If you are new to GitHub, here is a step-by-step guide on how to contribute to the project. - -#. First, fork `the BAMT project on GitHub `__. To do this, click the "Fork" button on the top right of the page. This will create a copy of the project in your own GitHub account. - -#. Clone the repository to your local machine by using `GitHub Desktop `__ or the CLI commnad (make sure that you have git installed): - - .. code-block:: bash - - git clone git@github.com:YourUsername/BAMT.git - cd path/to/repos/BAMT - -#. Create a new branch for your changes, it is not recommended to work on the ``master`` branch: - - .. code-block:: bash - - git checkout -b my-new-feature - -#. Make sure that your environment is up to date and set up for development. You can install all the dependencies by running the following command inside the project directory: - - .. code-block:: bash - - pip install -r requirements.txt - -#. Start making changes on your newly created branch, remembering to never work on the ``master`` branch! Work on this copy on your computer using Git to do the version control. - -#. When you're done making changes, check that your changes pass the tests by running the following command inside the project directory or follow `the instructions `__. Note, that you need to have the ``pytest`` package installed: - - .. code-block:: bash - - pip install pytest - pytest -v -s tests - -#. When you are done editing and testing, commit your changes to your local repository with a descriptive message: - - .. code-block:: bash - - git add modified_files - git commit -am "Added some feature" - -#. Push your local changes to the remote repository on GitHub into your branch: - - .. code-block:: bash - - git push origin my-new-feature - -Finally, go to the web page of your fork of the BAMT repo, and click 'Pull Request' (PR) to send your changes to the maintainers for review. - -If the following instructions look confusing, check `git documentation `__ or use GitHub Desktop with GUI. -Using GitHUb extension for Visual Studio Code, PyCharm or whatever IDE you use is also a good option. - -Before submitting a pull request --------------------------------- - -Before you submit a pull request for your contribution, please work through this checklist to make sure that you have done everything necessary so we can efficiently review and accept your changes. - -If your contribution changes BAMT code in any way, please follow the check list below: - - - Update the `documentation `__ to reflect the changes. - - - Update the `tests `__ - - - Update the `README.rst `__ file if the change affects description of BAMT. - - - Make sure that your code is properly formatted according to the `PEP8 `__ standard. You can automatically format your code using the ``autopep8`` package. - - - Make sure your commits are atomic (one feature per commit) and that you have written a descriptive commit message. - - If your contribution requires a new dependency, please make sure that you have added it to the ``requirements.txt`` file and follow these additional steps: - - - Double-check that the new dependency is easy to install via ``pip`` or ``conda`` and supports Python 3. If the dependency requires a complicated installation, then we most likely won't merge your changes because we want to keep BAMT easy to install. - - - Add the required version of the library to `requirements.txt `__ - - -Contribute to the documentation -------------------------------- -Take care of the documentation. - -All the documentation is created with the Sphinx autodoc feature. Use .. -automodule:: section which describes all the code in the module. - -- If a new package with several scripts: - - #. Go to `docs/source/api `__ and create new your_name_for_file.rst file. - - #. Add a Header underlined with “=” sign. It’s crucial. - - #. Add automodule description for each of your scripts - - .. code-block:: - - $.. automodule:: bamt.your.first.script.path - $ :members: - $ :undoc-members: - $ :show-inheritance: - - $.. automodule:: bamt.your.second.script.path - $ :members: - $ :undoc-members: - $ :show-inheritance: - - #. Add your_name_for_file to the toctree at docs/index.rst - -- If a new module to the existed package: - - Most of the sections are already described in `docs/source/api `__ , so you can: - - - choose the most appropriate and repeat 3-d step from the previous section. - - or create a new one and repeat 2-3 steps from the previous section. - -- If a new function or a class to the existing module: - - Be happy. Everything is already done for you. - - -Acknowledgements ----------------- - -This guide document is based at well-written `TPOT Framework contribution guide `__ and `FEDOT Framework contribution guide `__. \ No newline at end of file diff --git a/docs/source/getting_started/faq.rst b/docs/source/getting_started/faq.rst deleted file mode 100644 index ebe5898..0000000 --- a/docs/source/getting_started/faq.rst +++ /dev/null @@ -1,30 +0,0 @@ -.. _faq: - -FAQ -=== -1. On ``calculate_weights`` I got the following: - -.. code-block:: python - - assert np.all([A.dtype == "int" for A in Symbol_matrices]) - AssertionError - -What should I do? - -Answer: - | Because of not so clear dtypes policies, ``pyitlib`` need all integer columns - | as int type (col.dtype must return 'str'). So to fix you can do: -Instead of: - -.. code-block:: python - - bn.calculate_weights(discretized_data) - - -Convert dtypes to intc: - -.. code-block:: python - - bn.calculate_weights(discretized_data.astype(np.intc)) - - diff --git a/docs/source/getting_started/install.rst b/docs/source/getting_started/install.rst deleted file mode 100644 index d62b10c..0000000 --- a/docs/source/getting_started/install.rst +++ /dev/null @@ -1,32 +0,0 @@ -.. _install: - -Installation -============ - -The easiest way to get BAMT is through pip using the command - -.. code-block:: bash - - pip install bamt - -Since BAMT has LightGBM in it's dependencies it is required to install gcc and cmake -to compile LightGBM for Unix-based systems. - -On Linux you can do that with any package manager you prefer: - - -.. code-block:: bash - - sudo apt-get install gcc cmake - pacman -S gcc cmake - - -On macOS you can use homebrew: - -.. code-block:: bash - - brew install gcc cmake - -To learn more about Windows installation of LightGBM or other details, -please follow -`official LightGBM documentation page `__. diff --git a/docs/source/index.rst b/docs/source/index.rst deleted file mode 100644 index fef4498..0000000 --- a/docs/source/index.rst +++ /dev/null @@ -1,103 +0,0 @@ -.. BAMT documentation master file, created by - sphinx-quickstart on Thu Jan 19 21:13:42 2023. - You can adapt this file completely to your liking, but it should at least - contain the root `toctree` directive. - -Welcome to BAMT's documentation! -================================ - -The site contains documentation for the `BAMT framework `__. - -BAMT - Bayesian Analytical and Modelling Toolkit. -This repository contains a data modeling and analysis tool based on Bayesian networks. -It can be divided into two main parts - -algorithms for constructing and training Bayesian networks on data and algorithms for applying Bayesian networks for filling gaps, -generating synthetic data, assessing edges strength e.t.c. - -.. image:: ../images/bamt_readme_scheme.png - :target: ../images/bamt_readme_scheme.png - :align: center - -.. toctree:: - :maxdepth: 1 - :hidden: - :caption: Getting Started - - self - getting_started/install.rst - getting_started/contribution.rst - getting_started/faq.rst - getting_started/cite_us.rst - - -.. toctree:: - :maxdepth: 1 - :hidden: - :caption: API - - api/builders.rst - api/mi_entropy_gauss.rst - api/networks.rst - api/nodes.rst - api/preprocess.rst - api/preprocessors.rst - api/redef_HC.rst - api/redef_info_scores.rst - api/utils.rst - -.. toctree:: - :maxdepth: 1 - :hidden: - :caption: Models - - models/bayesiannetworks/base_network.rst - models/bayesiannetworks/discrete_bn.rst - models/bayesiannetworks/continuous_bn.rst - models/bayesiannetworks/composite_bn.rst - models/bayesiannetworks/hybrid_bn.rst - models/bayesiannetworks/sampling_predicting.rst - models/bayesiannetworks/large_bn_algs.rst - -.. toctree:: - :maxdepth: 1 - :hidden: - :caption: BN Theory - - bnalgs/bn_learning.rst - bnalgs/big_bns.rst - -.. toctree:: - :maxdepth: 1 - :hidden: - :caption: Data - - data_section/data_management.rst - -.. toctree:: - :maxdepth: 1 - :hidden: - :caption: Examples, workflow pipelines - - examples/learn_save.rst - examples/learn_params_vis.rst - examples/read_structure_param_learning.rst - examples/read_structure_and_params.rst - examples/learn_sampling_predict.rst - examples/add_ml_models_to_nodes.rst - examples/composite_network_example.rst - -.. toctree:: - :maxdepth: 1 - :hidden: - :caption: Tutorials - - tutorials/tutorials_gists.rst - - - -Indices and tables -================== - -* :ref:`genindex` -* :ref:`modindex` -* :ref:`search` diff --git a/docs/source/models/bayesiannetworks/base_network.rst b/docs/source/models/bayesiannetworks/base_network.rst deleted file mode 100644 index 37a67d5..0000000 --- a/docs/source/models/bayesiannetworks/base_network.rst +++ /dev/null @@ -1,78 +0,0 @@ -Bayesian Networks -================= - -BaseNetwork class, Hill Climbing and Evolutionary Algorithms ------------------------------------------------------------- - -BaseNetwork class -~~~~~~~~~~~~~~~~~ - -All three BN types are based on an abstract class ``BaseNetwork``. -This class provides the basic functions for all BN types. -The three BN types are ``DiscreteBN``, ``ContinuousBN`` and ``HybridBN``. -The ``HybridBN`` is a BN that contains both discrete and continuous variables. -The ``DiscreteBN`` and ``ContinuousBN`` are two BN types that are used to represent the BNs that contain only discrete or continuous variables, respectively. - -.. autoclass:: bamt.networks.BaseNetwork - :members: - :no-undoc-members: - -Hill Climbing and Evolutionary Algorithms -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Currently BAMT employs Hill Climbing and Evolutionary Algorithms to learn the structure of the BNs. To use them, -you need to specify the ``optimizer`` parameter in ``add_edges`` method. Here is an example: - -For Example: - -.. code-block:: python - - import bamt.networks as networks - import bamt.preprocessors as pp - import pandas as pd - - asia = pd.read_csv('data.csv') - encoder = preprocessing.LabelEncoder() - discretizer = preprocessing.KBinsDiscretizer(n_bins=5, encode='ordinal', strategy='quantile') - - p = pp.Preprocessor([('encoder', encoder), ('discretizer', discretizer)]) - discretized_data, est = p.apply(asia) - - bn = networks.DiscreteBN() - info = p.info - info - - # add edges using Hill Climbing - bn.add_edges(discretized_data, optimizer='HC') - # add edges using Evolutionary Algorithm - bn.add_edges(discretized_data, optimizer='Evo') - - - -Evolutionary Algorithm has these additional parameters: - - :param data: The data from which to build the structure. - :type data: DataFrame - :param classifier: A classification model for discrete nodes, defaults to None. - :type classifier: Optional[object] - :param regressor: A regression model for continuous nodes, defaults to None. - :type regressor: Optional[object] - - :Keyword Args: - * *init_nodes* (list) -- Initial nodes to be included in the population. - * *max_arity* (int) -- Maximum arity for the evolutionary algorithm. - * *timeout* (int) -- Timeout for the evolutionary algorithm in minutes. - * *pop_size* (int) -- Population size for the evolutionary algorithm. - * *crossover_prob* (float) -- Crossover probability for the evolutionary algorithm. - * *mutation_prob* (float) -- Mutation probability for the evolutionary algorithm. - * *custom_mutations* (list) -- Custom mutation types for the evolutionary algorithm. - * *custom_crossovers* (list) -- Custom crossover types for the evolutionary algorithm. - * *selection_type* (SelectionTypesEnum) -- Selection type for the evolutionary algorithm. - * *blacklist* (list) -- Blacklist for the evolutionary algorithm. - * *whitelist* (list) -- Whitelist for the evolutionary algorithm. - * *custom_constraints* (list) -- Custom constraints for the evolutionary algorithm. - * *custom_metric* (function) -- Custom objective metric for the evolutionary algorithm. - - The resulting structure is stored in the `skeleton` attribute of the `EvoStructureBuilder` object. - -HillClimbing parameters are described below in DiscreteBN, ContinuousBN and HybridBN sections. \ No newline at end of file diff --git a/docs/source/models/bayesiannetworks/composite_bn.rst b/docs/source/models/bayesiannetworks/composite_bn.rst deleted file mode 100644 index 638f8de..0000000 --- a/docs/source/models/bayesiannetworks/composite_bn.rst +++ /dev/null @@ -1,63 +0,0 @@ -Composite Bayesian Networks ------------------------- - -.. autoclass:: bamt.networks.composite_bn.CompositeBN - :members: - :no-undoc-members: - -Network initialization -~~~~~~~~~~~~~~~~~~~~~~ - -If the dataset contains both discrete and continuous variables, ``CompositeBN`` is can be used. -To initialize a ``CompositeBN`` object, you can use the following code: - -.. code-block:: python - - import bamt.networks as networks - - bn = networks.CompositeBN() - - -Data Preprocessing -~~~~~~~~~~~~~~~~~~ - -Before applying any structure or parametric learning, the data should be preprocessed as follows: - -.. code-block:: python - - import bamt.Preprocessor as pp - import pandas as pd - from sklearn import preprocessing - - data = pd.read_csv("path/to/data") - encoder = preprocessing.LabelEncoder() - p = pp.Preprocessor([("encoder", encoder)]) - - preprocessed_data, _ = p.apply(data) - - - -Structure Learning -~~~~~~~~~~~~~~~~~~ - -For structure learning of Composite BNs, ``bn.add_nodes()`` and ``bn.add_edges()`` methods are used. -Data should be non-preprocessed when passed to ``bn.add_edges()`` - -.. code-block:: python - - info = p.info - - bn.add_nodes(info) - - bn.add_edges(data) # !!! non-preprocessed - - -Parametric Learning -~~~~~~~~~~~~~~~~~~~ - -For parametric learning of continuous BNs, ``bn.fit_parameters()`` method is used. - -.. code-block:: python - - bn.fit_parameters(data) # !!! non-preprocessed - bn.get_info() diff --git a/docs/source/models/bayesiannetworks/continuous_bn.rst b/docs/source/models/bayesiannetworks/continuous_bn.rst deleted file mode 100644 index e91521a..0000000 --- a/docs/source/models/bayesiannetworks/continuous_bn.rst +++ /dev/null @@ -1,90 +0,0 @@ -Continuous Bayesian Networks ----------------------------- - -.. autoclass:: bamt.networks.continuous_bn.ContinuousBN - :members: - :no-undoc-members: - -Network initialization -~~~~~~~~~~~~~~~~~~~~~~ - -If all the variables in dataset are continuous, ``ContinuousBN`` is recommended to use. -To initialize a ``ContinuousBN`` object, you can use the following code: - -.. code-block:: python - - import bamt.networks as networks - - bn = networks.ContinuousBN(use_mixture=True) - -ContinuousBN has an additional parameter ``use_mixture``. -It is used to determine whether to use mixuters of Gaussian distributions to represent the conditional distribution of continuous variables. -If ``use_mixture`` is ``True``, mixuters of Gaussian distributions are used to represent the conditional distribution of continuous variables. - - -Data Preprocessing -~~~~~~~~~~~~~~~~~~ - -If the dataset contains ``integer`` values that should be treated as continuous variables (e.g. 1, 2 etc), they should be converted to ``float``. -Before applying any structure or parametric learning, the data should be preprocessed as follows: - -.. code-block:: python - - import bamt.Preprocessor as pp - import pandas as pd - from sklearn import preprocessing - - data = pd.read_csv('data.csv') - - encoder = preprocessing.LabelEncoder() - discretizer = preprocessing.KBinsDiscretizer(n_bins=5, encode='ordinal', strategy='quantile') - - p = pp.Preprocessor([('encoder', encoder), ('discretizer', discretizer)]) - discretized_data, est = p.apply(data) - - info = p.info - - - -Structure Learning -~~~~~~~~~~~~~~~~~~ - -For structure learning of continuous BNs, ``bn.add_nodes()`` and ``bn.add_edges()`` methods are used. - -.. code-block:: python - - from pgmpy.estimators import K2Score - - bn.add_nodes(info) # add nodes from info obtained from preprocessing - - bn.get_info() # to make sure that the network recognizes the variables as continuous - - params = { - # Defines initial nodes of the network, list of node names - 'init_nodes':[...] - # Defines initial edges of the network, list of tuples (node1, node2) - 'init_edges':[...] - # Strictly set edges where algoritm must learn, list of tuples (node1, node2) - 'white_list':[...] - # blacklist edges, list of tuples (node1, node2) - 'bl_add':[...] - # Allow algorithm to remove edges defined by user, bool - 'remove_init_edges':True - } - - # Structure learning using K2Score and parameters defined above - bn.add_edges(discretized_data, scoring_function=('K2', K2Score), params=params) - - bn.plot('foo.html') # add nodes from info obtained from preprocessing - - -Parametric Learning -~~~~~~~~~~~~~~~~~~~ - -For parametric learning of BNs, ``bn.fit_parameters()`` method is used. - -.. code-block:: python - - bn.fit_parameters(data) - - bn.get_info() # get information table about the network \ No newline at end of file diff --git a/docs/source/models/bayesiannetworks/discrete_bn.rst b/docs/source/models/bayesiannetworks/discrete_bn.rst deleted file mode 100644 index af6943a..0000000 --- a/docs/source/models/bayesiannetworks/discrete_bn.rst +++ /dev/null @@ -1,85 +0,0 @@ -Discrete Bayesian Networks --------------------------- - -.. autoclass:: bamt.networks.DiscreteBN - :members: - :no-undoc-members: - -Network initialization -~~~~~~~~~~~~~~~~~~~~~~ - -If all the variables in dataset are discrete, ``DiscreteBN`` is recommended to use. -To initialize a ``DiscreteBN`` object, you can use the following code: - -.. code-block:: python - - import bamt.networks as networks - - bn = networks.DiscreteBN() - -Data Preprocessing -~~~~~~~~~~~~~~~~~~ - -If the dataset contains ``float`` values (e.g. 1.0, 2.0 etc), they should be converted to ``integers`` or discretized before using ``DiscreteBN``. -Before applying any structure or parametric learning, the data should be preprocessed as follows: - -.. code-block:: python - - import bamt.Preprocessor as pp - import pandas as pd - from sklearn import preprocessing - - data = pd.read_csv('data.csv') - - encoder = preprocessing.LabelEncoder() - discretizer = preprocessing.KBinsDiscretizer(n_bins=5, encode='ordinal', strategy='quantile') - - p = pp.Preprocessor([('encoder', encoder), ('discretizer', discretizer)]) - discretized_data, est = p.apply(data) - - info = p.info - - - -Structure Learning -~~~~~~~~~~~~~~~~~~ - -For structure learning of discrete BNs, ``bn.add_nodes()`` and ``bn.add_edges()`` methods should be used. - -.. code-block:: python - - from pgmpy.estimators import K2Score - - bn.add_nodes(info) # add nodes from info obtained from preprocessing - - bn.get_info() # to make sure that the network recognizes the variables as discrete - - params = { - # Defines initial nodes of the network, list of node names - 'init_nodes':[...] - # Defines initial edges of the network, list of tuples (node1, node2) - 'init_edges':[...] - # Strictly set edges where algoritm must learn, list of tuples (node1, node2) - 'white_list':[...] - # blacklist edges, list of tuples (node1, node2) - 'bl_add':[...] - # Allow algorithm to remove edges defined by user, bool - 'remove_init_edges':True - } - - # Structure learning using K2Score and parameters defined above - bn.add_edges(discretized_data, scoring_function=('K2', K2Score), params=params) - - bn.plot('foo.html') # Plot the network, save it to foo.html, NOT rendered in notebook - - -Parametric Learning -~~~~~~~~~~~~~~~~~~~ - -For parametric learning of discrete BNs, ``bn.fit_parameters()`` method is used. - -.. code-block:: python - - bn.fit_parameters(data) - - bn.get_info() # get information table about the network diff --git a/docs/source/models/bayesiannetworks/hybrid_bn.rst b/docs/source/models/bayesiannetworks/hybrid_bn.rst deleted file mode 100644 index cf0ada5..0000000 --- a/docs/source/models/bayesiannetworks/hybrid_bn.rst +++ /dev/null @@ -1,88 +0,0 @@ -Hybrid Bayesian Networks ------------------------- - -.. autoclass:: bamt.networks.hybrid_bn.HybridBN - :members: - :no-undoc-members: - -Network initialization -~~~~~~~~~~~~~~~~~~~~~~ - -If the dataset contains both discrete and continuous variables, ``HybridBN`` is recommended to use. -To initialize a ``HybridBN`` object, you can use the following code: - -.. code-block:: python - - import bamt.networks as networks - - bn = networks.HybridBN(has_logit=True, use_mixture=True) - -HybridBN has two additional parameters ``has_logit`` and ``use_mixture``. -``has_logit`` is used to determine whether to use logit nodes. Logit nodes use machine learning algorithms to represent variable. -Logit nodes are discrete nodes that have continuous root nodes; classification models are used to model conditional distributions in such nodes. - - - -Data Preprocessing -~~~~~~~~~~~~~~~~~~ - -Before applying any structure or parametric learning, the data should be preprocessed as follows: - -.. code-block:: python - - import bamt.Preprocessor as pp - import pandas as pd - from sklearn import preprocessing - - data = pd.read_csv('data.csv') - - encoder = preprocessing.LabelEncoder() - discretizer = preprocessing.KBinsDiscretizer(n_bins=5, encode='ordinal', strategy='quantile') - - p = pp.Preprocessor([('encoder', encoder), ('discretizer', discretizer)]) - discretized_data, est = p.apply(data) - - info = p.info - - - -Structure Learning -~~~~~~~~~~~~~~~~~~ - -For structure learning of Hybrid BNs, ``bn.add_nodes()`` and ``bn.add_edges()`` methods are used. - -.. code-block:: python - - from pgmpy.estimators import K2Score - - bn.add_nodes(info) # add nodes from info obtained from preprocessing - - params = { - # Defines initial nodes of the network, list of node names - 'init_nodes':[...] - # Defines initial edges of the network, list of tuples (node1, node2) - 'init_edges':[...] - # Strictly set edges where algoritm must learn, list of tuples (node1, node2) - 'white_list':[...] - # blacklist edges, list of tuples (node1, node2) - 'bl_add':[...] - # Allow algorithm to remove edges defined by user, bool - 'remove_init_edges':True - } - - # Structure learning using K2Score and parameters defined above - bn.add_edges(discretized_data, scoring_function=('K2', K2Score), params=params) - - bn.plot('foo.html') # add nodes from info obtained from preprocessing - - -Parametric Learning -~~~~~~~~~~~~~~~~~~~ - -For parametric learning of continuous BNs, ``bn.fit_parameters()`` method is used. - -.. code-block:: python - - bn.fit_parameters(data) - - bn.get_info() # get information table about the network diff --git a/docs/source/models/bayesiannetworks/large_bn_algs.rst b/docs/source/models/bayesiannetworks/large_bn_algs.rst deleted file mode 100644 index b4fc4fd..0000000 --- a/docs/source/models/bayesiannetworks/large_bn_algs.rst +++ /dev/null @@ -1,59 +0,0 @@ -Algorithms for Large Bayesian Networks --------------------------------------- - - -BigBraveBN -~~~~~~~~~~ - -BigBraveBN is an algorithm that is used for structure learning of large Bayesian networks. -It restricts the search space by using Brave coefficient, that represents mutual occurrence of two variables in groups. -These groups are formed for each variable using kNN algorithm that searches nearest neighbors for each variable. -Mutual information score is used as metric for nearest neighbors algorithm. - - -.. autoclass:: bamt.networks.BigBraveBN - :members: - :no-undoc-members: - - - -BigBraveBN initialization and usage -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - -To use BigBraveBN, just follow typical structure learning procedure with one difference: use ``BigBraveBN`` to generate ``white_list``. - -First, initialize ``BigBraveBN`` object and generate possible edges list: - -.. code-block:: python - - space_restrictor = BigBraveBN() - - space_restrictor.set_possible_edges_by_brave( - df = data) - - ps = space_restrictor.possible_edges - -Then, preprocess the data: - -.. code-block:: python - - encoder = preprocessing.LabelEncoder() - discretizer = preprocessing.KBinsDiscretizer(n_bins=5, encode='ordinal', strategy='uniform') - - p = pp.Preprocessor([('encoder', encoder), ('discretizer', discretizer)]) - discretized_data, est = p.apply(data) - - info = p.info - -Then perform structure learning as usual, but use ``ps`` as ``white_list``: - -.. code-block:: python - - bn = networks.ContinuousBN() - - bn.add_nodes(descriptor=info) - - params = {'white_list': ps} - - bn.add_edges(discretized_data, scoring_function=('K2',K2Score), params=params) diff --git a/docs/source/models/bayesiannetworks/models_storing.rst b/docs/source/models/bayesiannetworks/models_storing.rst deleted file mode 100644 index 0390e90..0000000 --- a/docs/source/models/bayesiannetworks/models_storing.rst +++ /dev/null @@ -1,4 +0,0 @@ -Models Storing -============== - - diff --git a/docs/source/models/bayesiannetworks/sampling_predicting.rst b/docs/source/models/bayesiannetworks/sampling_predicting.rst deleted file mode 100644 index c34d5f2..0000000 --- a/docs/source/models/bayesiannetworks/sampling_predicting.rst +++ /dev/null @@ -1,26 +0,0 @@ -Sampling and Prediction with Bayesian Networks ----------------------------------------------- - -Sampling with Bayesian Networks -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -For data sampling from any BNs, ``bn.sample()`` method is used, but the network should be parametrically fitted first. - -.. code-block:: python - - bn.fit_parameters(data) - sampled_data = bn.sample(1000) # sample 1000 data points - - - -Predicting with Bayesian Networks -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -For prediction with any BNs, ``bn.predict()`` method is used, but the network should be also parametrically fitted first. - -.. code-block:: python - - bn.fit_parameters(data_train) - - # parall_count is the number of parallel threads to use - predictions = bn.predict(test=data_test, parall_count=4) diff --git a/docs/source/tutorials/tutorials_gists.rst b/docs/source/tutorials/tutorials_gists.rst deleted file mode 100644 index bd49907..0000000 --- a/docs/source/tutorials/tutorials_gists.rst +++ /dev/null @@ -1,8 +0,0 @@ -Full-scale tutorial examples -============================ - -In this section full-scale GitHub Gist tutorials for BAMT use cases are listed, -feel free to leave your comments and questions at the following gists: - -* `BAMT house price dataset prediction results improvement with sampling. `__ -* `BAMT house price dataset linear regression with BAMT. `__ From 158e03cfeb8c72b0a71aa4abb4f07d078d5144a9 Mon Sep 17 00:00:00 2001 From: jrzkaminski Date: Fri, 5 Jul 2024 16:12:01 +0300 Subject: [PATCH 08/15] remove old documentation --- other_requirements/readthedocs.txt | 19 ------------------- 1 file changed, 19 deletions(-) delete mode 100644 other_requirements/readthedocs.txt diff --git a/other_requirements/readthedocs.txt b/other_requirements/readthedocs.txt deleted file mode 100644 index 2f8da1a..0000000 --- a/other_requirements/readthedocs.txt +++ /dev/null @@ -1,19 +0,0 @@ -myst-parser -sphinx -sphinx_rtd_theme -readthedocs-sphinx-search -sphinxcontrib-details-directive -autodocsumm -networkx -matplotlib -pandas -numpy -tqdm -pyvis -pgmpy -setuptools -gmr -scikit-learn -scipy -missingno -pytest \ No newline at end of file From 20a0ce3dd34bf1bec27cb9ec45b20529a96bc745 Mon Sep 17 00:00:00 2001 From: jrzkaminski Date: Fri, 5 Jul 2024 16:13:30 +0300 Subject: [PATCH 09/15] move all images to docs --- README.md | 4 ++-- {img => docs/images}/BN-1.png | Bin {img => docs/images}/BN_gif.gif | Bin {img => docs/images}/K2.png | Bin {img => docs/images}/MI.png | Bin {img => docs/images}/concept.png | Bin {img => docs/images}/formula1.jpg | Bin {img => docs/images}/formula2.jpg | Bin {img => docs/images}/formula3.jpg | Bin {img => docs/images}/gender.jpg | Bin {img => docs/images}/interest.jpg | Bin {img => docs/images}/likes.jpg | Bin {img => docs/images}/modules_scheme.png | Bin {img => docs/images}/pseudocode.jpg | Bin {img => docs/images}/sampling.jpg | Bin {img => docs/images}/srmse.jpg | Bin {img => docs/images}/synth_gen.png | Bin 17 files changed, 2 insertions(+), 2 deletions(-) rename {img => docs/images}/BN-1.png (100%) rename {img => docs/images}/BN_gif.gif (100%) rename {img => docs/images}/K2.png (100%) rename {img => docs/images}/MI.png (100%) rename {img => docs/images}/concept.png (100%) rename {img => docs/images}/formula1.jpg (100%) rename {img => docs/images}/formula2.jpg (100%) rename {img => docs/images}/formula3.jpg (100%) rename {img => docs/images}/gender.jpg (100%) rename {img => docs/images}/interest.jpg (100%) rename {img => docs/images}/likes.jpg (100%) rename {img => docs/images}/modules_scheme.png (100%) rename {img => docs/images}/pseudocode.jpg (100%) rename {img => docs/images}/sampling.jpg (100%) rename {img => docs/images}/srmse.jpg (100%) rename {img => docs/images}/synth_gen.png (100%) diff --git a/README.md b/README.md index 3268555..ea2538c 100644 --- a/README.md +++ b/README.md @@ -47,11 +47,11 @@ The following algorithms for Bayesian Networks learning are implemented: - Non-parametric learning of distributions with various user-specified regression and classification models. - The algorithm for structural training of large Bayesian networks (> 10 nodes) is based on local training of small networks with their subsequent algorithmic connection. -![bn example gif](img/BN_gif.gif) +![bn example gif](docs/images/BN_gif.gif) For example, in terms of data analysis and modeling using Bayesian networks, a pipeline has been implemented to generate synthetic data by sampling from Bayesian networks. -![synthetics generation](img/synth_gen.png) +![synthetics generation](docs/images/synth_gen.png) ## How to use diff --git a/img/BN-1.png b/docs/images/BN-1.png similarity index 100% rename from img/BN-1.png rename to docs/images/BN-1.png diff --git a/img/BN_gif.gif b/docs/images/BN_gif.gif similarity index 100% rename from img/BN_gif.gif rename to docs/images/BN_gif.gif diff --git a/img/K2.png b/docs/images/K2.png similarity index 100% rename from img/K2.png rename to docs/images/K2.png diff --git a/img/MI.png b/docs/images/MI.png similarity index 100% rename from img/MI.png rename to docs/images/MI.png diff --git a/img/concept.png b/docs/images/concept.png similarity index 100% rename from img/concept.png rename to docs/images/concept.png diff --git a/img/formula1.jpg b/docs/images/formula1.jpg similarity index 100% rename from img/formula1.jpg rename to docs/images/formula1.jpg diff --git a/img/formula2.jpg b/docs/images/formula2.jpg similarity index 100% rename from img/formula2.jpg rename to docs/images/formula2.jpg diff --git a/img/formula3.jpg b/docs/images/formula3.jpg similarity index 100% rename from img/formula3.jpg rename to docs/images/formula3.jpg diff --git a/img/gender.jpg b/docs/images/gender.jpg similarity index 100% rename from img/gender.jpg rename to docs/images/gender.jpg diff --git a/img/interest.jpg b/docs/images/interest.jpg similarity index 100% rename from img/interest.jpg rename to docs/images/interest.jpg diff --git a/img/likes.jpg b/docs/images/likes.jpg similarity index 100% rename from img/likes.jpg rename to docs/images/likes.jpg diff --git a/img/modules_scheme.png b/docs/images/modules_scheme.png similarity index 100% rename from img/modules_scheme.png rename to docs/images/modules_scheme.png diff --git a/img/pseudocode.jpg b/docs/images/pseudocode.jpg similarity index 100% rename from img/pseudocode.jpg rename to docs/images/pseudocode.jpg diff --git a/img/sampling.jpg b/docs/images/sampling.jpg similarity index 100% rename from img/sampling.jpg rename to docs/images/sampling.jpg diff --git a/img/srmse.jpg b/docs/images/srmse.jpg similarity index 100% rename from img/srmse.jpg rename to docs/images/srmse.jpg diff --git a/img/synth_gen.png b/docs/images/synth_gen.png similarity index 100% rename from img/synth_gen.png rename to docs/images/synth_gen.png From 69a91e8a915773f89939578e8b66437886777707 Mon Sep 17 00:00:00 2001 From: jrzkaminski Date: Fri, 5 Jul 2024 17:38:13 +0300 Subject: [PATCH 10/15] minor changes --- bamt/core/node_models/__init__.py | 4 + .../node_models/continuous_distribution.py | 207 +++++++++++++++++- bamt/core/nodes/__init__.py | 2 + bamt/core/nodes/child_nodes/__init__.py | 2 + bamt/core/nodes/child_nodes/child_node.py | 4 +- bamt/core/nodes/node.py | 18 +- bamt/core/nodes/root_nodes/__init__.py | 2 + bamt/core/nodes/root_nodes/continuous_node.py | 21 +- bamt/core/nodes/root_nodes/root_node.py | 8 +- 9 files changed, 253 insertions(+), 15 deletions(-) diff --git a/bamt/core/node_models/__init__.py b/bamt/core/node_models/__init__.py index e69de29..f9da437 100644 --- a/bamt/core/node_models/__init__.py +++ b/bamt/core/node_models/__init__.py @@ -0,0 +1,4 @@ +from continuous_distribution import ContinuousDistribution +from classifier import Classifier +from empirical_distribution import EmpiricalDistribution +from regressor import Regressor diff --git a/bamt/core/node_models/continuous_distribution.py b/bamt/core/node_models/continuous_distribution.py index 9d31e21..6a0df6c 100644 --- a/bamt/core/node_models/continuous_distribution.py +++ b/bamt/core/node_models/continuous_distribution.py @@ -1,22 +1,209 @@ import numpy as np +from scipy import stats +from scipy.stats import rv_continuous +from scipy.special import kl_div +from typing import Tuple, Optional, List, Type, Dict +from enum import Enum -from .distribution import Distribution +# Get all continuous distributions from scipy.stats +_CONTINUOUS_DISTRIBUTIONS = [ + getattr(stats, name) + for name in dir(stats) + if isinstance(getattr(stats, name), stats.rv_continuous) +] -class ContinuousDistribution(Distribution): - def __init__(self, distribution_model=None, **parameters): +class DistributionPool(Enum): + """ + Enum for selecting the distribution pool. + """ + + SMALL = "small" + LARGE = "large" + CUSTOM = "custom" + + +# noinspection PyPep8Naming +class ContinuousDistribution: + """ + Class for continuous distributions. + This class is a wrapper for continuous distributions from `scipy.stats` module, + however, any custom continuous distribution can be used, as long as it implements + `scipy.stats` interface. + Example Usage: + ```python + data = np.random.normal(0, 1, 1000) + dist = ContinuousDistribution() + dist.fit(data, distributions_pool=DistributionPool.SMALL) + samples = dist.sample(10) + ``` + """ + + SMALL_POOL: Tuple[Type[stats.rv_continuous], ...] = ( + stats.norm, + stats.laplace, + stats.t, + stats.uniform, + stats.rayleigh, + ) + + LARGE_POOL: List[Type[stats.rv_continuous]] = _CONTINUOUS_DISTRIBUTIONS + + def __init__( + self, + distribution_model: Optional[Type[stats.rv_continuous]] = None, + **parameters, + ) -> None: + """ + Initialize the ContinuousDistribution with an optional distribution model and parameters. + + Args: + distribution_model (Optional[Type[stats.rv_continuous]]): A specific `scipy.stats` distribution. + **parameters: Parameters for the specified distribution model. + """ self._distribution = distribution_model self._parameters = parameters - def fit(self, X: np.ndarray) -> None: + def fit( + self, + X: np.ndarray, + distributions_pool: DistributionPool = DistributionPool.SMALL, + custom_pool: Optional[List[Type[stats.rv_continuous]]] = None, + ) -> None: + """ + Fit the data to the best distribution within the specified pool. + + Args: + X (np.ndarray): The data to fit. + distributions_pool (DistributionPool): The pool of distributions to consider (small, large, custom). + custom_pool (Optional[List[Type[stats.rv_continuous]]]): if `DistributionPool.CUSTOM` is selected. + + Raises: + ValueError: If a custom pool is selected but not provided. + """ if self._distribution is None: - # TODO: implement an algorithm that finds a distribution and fits it with chosen parameters - pass + pool = self._select_pool(distributions_pool, custom_pool) + self._distribution, self._parameters = self._fit_best_distribution(X, pool) + else: + self._parameters = self._distribution.fit(X) + + @staticmethod + def _select_pool( + pool_type: DistributionPool, + custom_pool: Optional[List[Type[stats.rv_continuous]]], + ) -> List[Type[stats.rv_continuous]]: + """ + Select the appropriate pool of distributions. + + Args: + pool_type (DistributionPool): The type of pool to select. + custom_pool (Optional[List[Type[stats.rv_continuous]]]): The custom pool of distributions. + + Returns: + List[Type[stats.rv_continuous]]: The selected pool of distributions. + + Raises: + ValueError: If a custom pool is selected but not provided. + """ + if pool_type == DistributionPool.SMALL: + return list(ContinuousDistribution.SMALL_POOL) + elif pool_type == DistributionPool.LARGE: + return ContinuousDistribution.LARGE_POOL + elif pool_type == DistributionPool.CUSTOM: + if custom_pool is not None: + return custom_pool + else: + raise ValueError("Custom pool selected but no custom pool provided") else: - pass + raise ValueError("Invalid distribution pool type") + + @staticmethod + def _fit_best_distribution( + X: np.ndarray, distributions_pool: List[Type[stats.rv_continuous]] + ) -> Tuple[Type[rv_continuous], Dict[str, float]]: + """ + Fit the data to the best distribution in the pool by minimizing the KL divergence. + + Args: + X (np.ndarray): The data to fit. + distributions_pool (List[Type[stats.rv_continuous]]): The pool of distributions to consider. + + Returns: + Tuple[Optional[Type[stats.rv_continuous]], dict]: The best fitting distribution and its parameters. + """ + best_distribution = None + best_params = None + min_kl_divergence: float = np.inf + + # Compute empirical histogram + hist, bin_edges = np.histogram(X, bins="auto", density=True) + bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2 + + for distribution in distributions_pool: + try: + # Fit the distribution to the data + params = distribution.fit(X) + + # Compute the fitted probability density function + pdf = distribution.pdf(bin_centers, *params) + + # Compute the KL divergence between the empirical and fitted distribution + kl_divergence = np.sum(kl_div(hist, pdf)) + + # Update the best distribution if the current one is better + if kl_divergence < min_kl_divergence: + min_kl_divergence = kl_divergence + best_distribution = distribution + best_params = params + except Exception as e: + # Handle any exceptions that occur during fitting + continue + + return best_distribution, best_params def sample(self, num_samples: int) -> np.ndarray: - pass + """ + Generate samples from the fitted distribution. + + Args: + num_samples (int): The number of samples to generate. + + Returns: + np.ndarray: The generated samples. + + Raises: + ValueError: If no distribution is fitted yet. + """ + if self._distribution is None: + raise ValueError("No distribution fitted yet.") + return self._distribution.rvs(*self._parameters, size=num_samples) + + def __getattr__(self, name: str): + """ + Redirect method calls to the underlying distribution if it exists. + + Args: + name (str): The name of the attribute or method. + + Returns: + Any: The attribute or method from the underlying distribution. + + Raises: + AttributeError: If the attribute or method does not exist. + """ + if self._distribution: + return getattr(self._distribution, name) + raise AttributeError( + f"'{self.__class__.__name__}' object has no attribute '{name}'" + ) + + def __str__(self) -> str: + """ + Return the string representation of the distribution. - def __str__(self): - return str(self._distribution.name) + " continuous distribution" + Returns: + str: The name of the fitted distribution or a message indicating no distribution is fitted. + """ + if self._distribution: + return f"{self._distribution.name} continuous distribution" + return "No distribution fitted yet" diff --git a/bamt/core/nodes/__init__.py b/bamt/core/nodes/__init__.py index e69de29..8fe9434 100644 --- a/bamt/core/nodes/__init__.py +++ b/bamt/core/nodes/__init__.py @@ -0,0 +1,2 @@ +from child_nodes import ConditionalDiscreteNode, ConditionalContinuousNode +from root_nodes import DiscreteNode, ContinuousNode diff --git a/bamt/core/nodes/child_nodes/__init__.py b/bamt/core/nodes/child_nodes/__init__.py index e69de29..ce86f4c 100644 --- a/bamt/core/nodes/child_nodes/__init__.py +++ b/bamt/core/nodes/child_nodes/__init__.py @@ -0,0 +1,2 @@ +from conditional_discrete_node import ConditionalDiscreteNode +from conditional_continuous_node import ConditionalContinuousNode diff --git a/bamt/core/nodes/child_nodes/child_node.py b/bamt/core/nodes/child_nodes/child_node.py index fc1636d..849e274 100644 --- a/bamt/core/nodes/child_nodes/child_node.py +++ b/bamt/core/nodes/child_nodes/child_node.py @@ -1,6 +1,8 @@ +from abc import ABC + from bamt.core.nodes.node import Node -class ChildNode(Node): +class ChildNode(Node, ABC): def __init__(self): super().__init__() diff --git a/bamt/core/nodes/node.py b/bamt/core/nodes/node.py index 6dc4140..e801837 100644 --- a/bamt/core/nodes/node.py +++ b/bamt/core/nodes/node.py @@ -1,6 +1,22 @@ -from abc import ABC +from abc import ABC, abstractmethod class Node(ABC): def __init__(self): pass + + @abstractmethod + def __str__(self): + pass + + @abstractmethod + def get_children(self): + pass + + @abstractmethod + def get_parents(self): + pass + + @abstractmethod + def fit(self, X): + pass diff --git a/bamt/core/nodes/root_nodes/__init__.py b/bamt/core/nodes/root_nodes/__init__.py index e69de29..5a245cf 100644 --- a/bamt/core/nodes/root_nodes/__init__.py +++ b/bamt/core/nodes/root_nodes/__init__.py @@ -0,0 +1,2 @@ +from continuous_node import ContinuousNode +from discrete_node import DiscreteNode diff --git a/bamt/core/nodes/root_nodes/continuous_node.py b/bamt/core/nodes/root_nodes/continuous_node.py index f725000..2aa9e92 100644 --- a/bamt/core/nodes/root_nodes/continuous_node.py +++ b/bamt/core/nodes/root_nodes/continuous_node.py @@ -1,10 +1,27 @@ +import numpy as np from .root_node import RootNode +from bamt.core.node_models import ContinuousDistribution class ContinuousNode(RootNode): - def __init__(self): + """Class for continuous nodes of the Bayesian network. + Continuous nodes are represented by `scipy.stats` continuous distributions. + These distributions are wrapped in the `ContinuousDistribution` class. + """ + + def __init__(self, distribution: ContinuousDistribution = None): super().__init__() - self._distribution = None + self._distribution = distribution def __str__(self): return "Continuous Node with " + str(self._distribution) + + def fit(self, X): + self._distribution.fit(X) + + def sample(self, num_samples: int) -> np.ndarray: + return self._distribution.sample(num_samples) + + @property + def distribution(self): + return self._distribution diff --git a/bamt/core/nodes/root_nodes/root_node.py b/bamt/core/nodes/root_nodes/root_node.py index faf7ff1..cb26373 100644 --- a/bamt/core/nodes/root_nodes/root_node.py +++ b/bamt/core/nodes/root_nodes/root_node.py @@ -1,6 +1,12 @@ +from abc import ABC + from bamt.core.nodes.node import Node -class RootNode(Node): +class RootNode(Node, ABC): + """Abstract Class based on Node Abstract class for root nodes of the + Bayesian network. Root nodes are represented by + Distributions.""" + def __init__(self): super().__init__() From 738ceee27e34a142c486068a8387563e2761e0ea Mon Sep 17 00:00:00 2001 From: jrzkaminski Date: Fri, 5 Jul 2024 17:58:40 +0300 Subject: [PATCH 11/15] empirical distribution implementation --- .../node_models/empirical_distribution.py | 89 +++++++++++++++++-- 1 file changed, 81 insertions(+), 8 deletions(-) diff --git a/bamt/core/node_models/empirical_distribution.py b/bamt/core/node_models/empirical_distribution.py index ec63673..2e0a5d1 100644 --- a/bamt/core/node_models/empirical_distribution.py +++ b/bamt/core/node_models/empirical_distribution.py @@ -1,17 +1,90 @@ import numpy as np - -from .distribution import Distribution +from typing import Optional, Union, List +from distribution import Distribution +# noinspection PyPep8Naming class EmpiricalDistribution(Distribution): - def __init__(self): - pass + """ + Class for empirical distributions. + This class fits an empirical distribution to the provided categorical or discrete data by calculating + the probabilities of unique values and allows sampling from it. + Usage example: + ```python + data = ['apple', 'banana', 'apple', 'orange', 'banana', 'banana', 'orange', 'apple'] + emp_dist = EmpiricalDistribution() + emp_dist.fit(data) + print(emp_dist) + samples = emp_dist.sample(10) + print(samples) + print(emp_dist.pmf('banana')) + ``` + """ + + def __init__(self) -> None: + """ + Initialize the EmpiricalDistribution. + """ + self._values: Optional[np.ndarray] = None + self._probabilities: Optional[np.ndarray] = None + + def fit(self, X: Union[np.ndarray, List[Union[str, int]]]) -> None: + """ + Fit the empirical distribution to the categorical or discrete data by + calculating the probabilities of unique values. - def fit(self, X: np.ndarray) -> None: - pass + Args: + X (Union[np.ndarray, List[Union[str, int]]]): The categorical or discrete data to fit. + """ + X = np.asarray(X) + unique_values, counts = np.unique(X, return_counts=True) + self._values = unique_values + self._probabilities = counts / counts.sum() def sample(self, num_samples: int) -> np.ndarray: - pass + """ + Generate samples from the empirical distribution. + + Args: + num_samples (int): The number of samples to generate. + + Returns: + np.ndarray: The generated samples. + + Raises: + ValueError: If no data has been fitted yet. + """ + if self._values is None or self._probabilities is None: + raise ValueError("No data fitted yet.") + return np.random.choice( + self._values, size=num_samples, p=self._probabilities, replace=True + ) + + def pmf(self, value: Union[str, int]) -> np.ndarray | float: + """ + Return the probability mass function (PMF) for a given value. + + Args: + value (Union[str, int]): The categorical or discrete value to get the PMF for. + + Returns: + float: The PMF of the given value. + + Raises: + ValueError: If no data has been fitted yet. + """ + if self._values is None or self._probabilities is None: + raise ValueError("No data fitted yet.") + idx = np.where(self._values == value) + if idx[0].size == 0: + return 0.0 + return self._probabilities[idx][0] + + def __str__(self) -> str: + """ + Return the string representation of the empirical distribution. - def __str__(self): + Returns: + str: The name of the distribution. + """ return "Empirical Distribution" From f482255de0fafd5d4fa1f3c4cc5e9e5ba3ad0f8a Mon Sep 17 00:00:00 2001 From: jrzkaminski Date: Fri, 5 Jul 2024 18:28:21 +0300 Subject: [PATCH 12/15] some refactoring --- .../node_models/continuous_distribution.py | 26 +++--- bamt/core/nodes/root_nodes/continuous_node.py | 48 ++++++++--- bamt/core/nodes/root_nodes/root_node.py | 80 ++++++++++++++++++- 3 files changed, 129 insertions(+), 25 deletions(-) diff --git a/bamt/core/node_models/continuous_distribution.py b/bamt/core/node_models/continuous_distribution.py index 6a0df6c..3e252f8 100644 --- a/bamt/core/node_models/continuous_distribution.py +++ b/bamt/core/node_models/continuous_distribution.py @@ -61,7 +61,7 @@ def __init__( distribution_model (Optional[Type[stats.rv_continuous]]): A specific `scipy.stats` distribution. **parameters: Parameters for the specified distribution model. """ - self._distribution = distribution_model + self._distribution_model = distribution_model self._parameters = parameters def fit( @@ -81,11 +81,11 @@ def fit( Raises: ValueError: If a custom pool is selected but not provided. """ - if self._distribution is None: + if self._distribution_model is None: pool = self._select_pool(distributions_pool, custom_pool) - self._distribution, self._parameters = self._fit_best_distribution(X, pool) + self._distribution_model, self._parameters = self._fit_best_distribution(X, pool) else: - self._parameters = self._distribution.fit(X) + self._parameters = self._distribution_model.fit(X) @staticmethod def _select_pool( @@ -119,14 +119,14 @@ def _select_pool( @staticmethod def _fit_best_distribution( - X: np.ndarray, distributions_pool: List[Type[stats.rv_continuous]] + X: np.ndarray, distribution_models_pool: List[Type[stats.rv_continuous]] ) -> Tuple[Type[rv_continuous], Dict[str, float]]: """ Fit the data to the best distribution in the pool by minimizing the KL divergence. Args: X (np.ndarray): The data to fit. - distributions_pool (List[Type[stats.rv_continuous]]): The pool of distributions to consider. + distribution_models_pool (List[Type[stats.rv_continuous]]): The pool of distributions to consider. Returns: Tuple[Optional[Type[stats.rv_continuous]], dict]: The best fitting distribution and its parameters. @@ -139,7 +139,7 @@ def _fit_best_distribution( hist, bin_edges = np.histogram(X, bins="auto", density=True) bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2 - for distribution in distributions_pool: + for distribution in distribution_models_pool: try: # Fit the distribution to the data params = distribution.fit(X) @@ -174,9 +174,9 @@ def sample(self, num_samples: int) -> np.ndarray: Raises: ValueError: If no distribution is fitted yet. """ - if self._distribution is None: + if self._distribution_model is None: raise ValueError("No distribution fitted yet.") - return self._distribution.rvs(*self._parameters, size=num_samples) + return self._distribution_model.rvs(*self._parameters, size=num_samples) def __getattr__(self, name: str): """ @@ -191,8 +191,8 @@ def __getattr__(self, name: str): Raises: AttributeError: If the attribute or method does not exist. """ - if self._distribution: - return getattr(self._distribution, name) + if self._distribution_model: + return getattr(self._distribution_model, name) raise AttributeError( f"'{self.__class__.__name__}' object has no attribute '{name}'" ) @@ -204,6 +204,6 @@ def __str__(self) -> str: Returns: str: The name of the fitted distribution or a message indicating no distribution is fitted. """ - if self._distribution: - return f"{self._distribution.name} continuous distribution" + if self._distribution_model: + return f"{self._distribution_model.name} continuous distribution" return "No distribution fitted yet" diff --git a/bamt/core/nodes/root_nodes/continuous_node.py b/bamt/core/nodes/root_nodes/continuous_node.py index 2aa9e92..41375b3 100644 --- a/bamt/core/nodes/root_nodes/continuous_node.py +++ b/bamt/core/nodes/root_nodes/continuous_node.py @@ -1,27 +1,53 @@ import numpy as np from .root_node import RootNode from bamt.core.node_models import ContinuousDistribution +from typing import List, Optional class ContinuousNode(RootNode): - """Class for continuous nodes of the Bayesian network. + """ + Class for continuous nodes of the Bayesian network. Continuous nodes are represented by `scipy.stats` continuous distributions. These distributions are wrapped in the `ContinuousDistribution` class. + Example Usage: + + ```python + data = np.random.normal(0, 1, 1000) + dist = ContinuousDistribution() + node = ContinuousNode(distribution=dist) + node.fit(data) + print(node) + samples = node.sample(10) + print(samples) + print(node.get_parents()) + ``` """ - def __init__(self, distribution: ContinuousDistribution = None): - super().__init__() - self._distribution = distribution + def __init__(self, distribution: Optional[ContinuousDistribution] = None): + """ + Initialize the ContinuousNode with an optional ContinuousDistribution. - def __str__(self): - return "Continuous Node with " + str(self._distribution) + Args: + distribution (Optional[ContinuousDistribution]): A ContinuousDistribution object. + """ + super().__init__() + self._distribution = distribution if distribution is not None else ContinuousDistribution() - def fit(self, X): - self._distribution.fit(X) + def __str__(self) -> str: + """ + Return the string representation of the continuous node. - def sample(self, num_samples: int) -> np.ndarray: - return self._distribution.sample(num_samples) + Returns: + str: The string representation of the node. + """ + return "Continuous Node with " + str(self._distribution) @property - def distribution(self): + def distribution(self) -> ContinuousDistribution: + """ + Get the continuous distribution of this node. + + Returns: + ContinuousDistribution: The continuous distribution. + """ return self._distribution diff --git a/bamt/core/nodes/root_nodes/root_node.py b/bamt/core/nodes/root_nodes/root_node.py index cb26373..5da83d3 100644 --- a/bamt/core/nodes/root_nodes/root_node.py +++ b/bamt/core/nodes/root_nodes/root_node.py @@ -1,5 +1,8 @@ from abc import ABC +import numpy as np + +from bamt.core.node_models.distribution import Distribution from bamt.core.nodes.node import Node @@ -7,6 +10,81 @@ class RootNode(Node, ABC): """Abstract Class based on Node Abstract class for root nodes of the Bayesian network. Root nodes are represented by Distributions.""" - def __init__(self): super().__init__() + self._distribution = None + self._children = [] + + def __str__(self): + pass + + def get_children(self) -> list: + """ + Get the children of this node. + + Returns: + List[ContinuousNode]: A list of child nodes. + """ + return self._children + + def get_parents(self) -> str: + """ + Get the parents of this node. Since this is a root node, it cannot have parents. + + Returns: + str: A message indicating that this node is a root node and cannot have parents. + """ + return "This is a root node, thus it cannot have parents." + + def add_child(self, child) -> None: + """ + Add a child to this node. + + Args: + child: The child node to add. + """ + if child not in self._children: + self._children.append(child) + + def add_parent(self, parent) -> str: + """ + Attempt to add a parent to this node. Since this is a root node, it cannot have parents. + + Args: + parent: The parent node to add. + + Returns: + str: A message indicating that this node is a root node and cannot have parents. + """ + raise Exception("A parent cannot be added to a root node") + + def fit(self, X: np.ndarray) -> None: + """ + Fit the distribution to the data. + + Args: + X (np.ndarray): The data to fit. + """ + self._distribution.fit(X) + + def sample(self, num_samples: int) -> np.ndarray: + """ + Generate samples from the fitted distribution. + + Args: + num_samples (int): The number of samples to generate. + + Returns: + np.ndarray: The generated samples. + """ + return self._distribution.sample(num_samples) + + @property + def distribution(self) -> Distribution: + """ + Get the continuous distribution of this node. + + Returns: + Distribution: The desired distribution. + """ + return self._distribution From 855b24579a84bb6ffaf7900a9064adc217bda36c Mon Sep 17 00:00:00 2001 From: jrzkaminski Date: Fri, 5 Jul 2024 18:30:58 +0300 Subject: [PATCH 13/15] formatting --- bamt/core/node_models/continuous_distribution.py | 4 +++- bamt/core/nodes/root_nodes/continuous_node.py | 4 +++- bamt/core/nodes/root_nodes/root_node.py | 1 + 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/bamt/core/node_models/continuous_distribution.py b/bamt/core/node_models/continuous_distribution.py index 3e252f8..eafd37d 100644 --- a/bamt/core/node_models/continuous_distribution.py +++ b/bamt/core/node_models/continuous_distribution.py @@ -83,7 +83,9 @@ def fit( """ if self._distribution_model is None: pool = self._select_pool(distributions_pool, custom_pool) - self._distribution_model, self._parameters = self._fit_best_distribution(X, pool) + self._distribution_model, self._parameters = self._fit_best_distribution( + X, pool + ) else: self._parameters = self._distribution_model.fit(X) diff --git a/bamt/core/nodes/root_nodes/continuous_node.py b/bamt/core/nodes/root_nodes/continuous_node.py index 41375b3..3e776ee 100644 --- a/bamt/core/nodes/root_nodes/continuous_node.py +++ b/bamt/core/nodes/root_nodes/continuous_node.py @@ -31,7 +31,9 @@ def __init__(self, distribution: Optional[ContinuousDistribution] = None): distribution (Optional[ContinuousDistribution]): A ContinuousDistribution object. """ super().__init__() - self._distribution = distribution if distribution is not None else ContinuousDistribution() + self._distribution = ( + distribution if distribution is not None else ContinuousDistribution() + ) def __str__(self) -> str: """ diff --git a/bamt/core/nodes/root_nodes/root_node.py b/bamt/core/nodes/root_nodes/root_node.py index 5da83d3..24f4d4e 100644 --- a/bamt/core/nodes/root_nodes/root_node.py +++ b/bamt/core/nodes/root_nodes/root_node.py @@ -10,6 +10,7 @@ class RootNode(Node, ABC): """Abstract Class based on Node Abstract class for root nodes of the Bayesian network. Root nodes are represented by Distributions.""" + def __init__(self): super().__init__() self._distribution = None From 9795fefcab868bdcadfa6dda19bf7966e1b065c0 Mon Sep 17 00:00:00 2001 From: jrzkaminski Date: Fri, 5 Jul 2024 18:34:34 +0300 Subject: [PATCH 14/15] docstring fix --- .../node_models/continuous_distribution.py | 10 ++++------ .../node_models/empirical_distribution.py | 17 ++++++++--------- bamt/core/nodes/root_nodes/continuous_node.py | 19 +++++++++---------- 3 files changed, 21 insertions(+), 25 deletions(-) diff --git a/bamt/core/node_models/continuous_distribution.py b/bamt/core/node_models/continuous_distribution.py index eafd37d..d37c06a 100644 --- a/bamt/core/node_models/continuous_distribution.py +++ b/bamt/core/node_models/continuous_distribution.py @@ -31,12 +31,10 @@ class ContinuousDistribution: however, any custom continuous distribution can be used, as long as it implements `scipy.stats` interface. Example Usage: - ```python - data = np.random.normal(0, 1, 1000) - dist = ContinuousDistribution() - dist.fit(data, distributions_pool=DistributionPool.SMALL) - samples = dist.sample(10) - ``` + >>> data = np.random.normal(0, 1, 1000) + >>> dist = ContinuousDistribution() + >>> dist.fit(data, distributions_pool=DistributionPool.SMALL) + >>> samples = dist.sample(10) """ SMALL_POOL: Tuple[Type[stats.rv_continuous], ...] = ( diff --git a/bamt/core/node_models/empirical_distribution.py b/bamt/core/node_models/empirical_distribution.py index 2e0a5d1..8991b88 100644 --- a/bamt/core/node_models/empirical_distribution.py +++ b/bamt/core/node_models/empirical_distribution.py @@ -10,15 +10,14 @@ class EmpiricalDistribution(Distribution): This class fits an empirical distribution to the provided categorical or discrete data by calculating the probabilities of unique values and allows sampling from it. Usage example: - ```python - data = ['apple', 'banana', 'apple', 'orange', 'banana', 'banana', 'orange', 'apple'] - emp_dist = EmpiricalDistribution() - emp_dist.fit(data) - print(emp_dist) - samples = emp_dist.sample(10) - print(samples) - print(emp_dist.pmf('banana')) - ``` + >>> data = ['apple', 'banana', 'apple', 'orange', 'banana', 'banana', 'orange', 'apple'] + >>> emp_dist = EmpiricalDistribution() + >>> emp_dist.fit(data) + >>> print(emp_dist) + >>> samples = emp_dist.sample(10) + >>> print(samples) + >>> print(emp_dist.pmf('banana')) + """ def __init__(self) -> None: diff --git a/bamt/core/nodes/root_nodes/continuous_node.py b/bamt/core/nodes/root_nodes/continuous_node.py index 3e776ee..82edcf5 100644 --- a/bamt/core/nodes/root_nodes/continuous_node.py +++ b/bamt/core/nodes/root_nodes/continuous_node.py @@ -11,16 +11,15 @@ class ContinuousNode(RootNode): These distributions are wrapped in the `ContinuousDistribution` class. Example Usage: - ```python - data = np.random.normal(0, 1, 1000) - dist = ContinuousDistribution() - node = ContinuousNode(distribution=dist) - node.fit(data) - print(node) - samples = node.sample(10) - print(samples) - print(node.get_parents()) - ``` + + >>> data = np.random.normal(0, 1, 1000) + >>> dist = ContinuousDistribution() + >>> node = ContinuousNode(distribution=dist) + >>> node.fit(data) + >>> print(node) + >>> samples = node.sample(10) + >>> print(samples) + >>> print(node.get_parents()) """ def __init__(self, distribution: Optional[ContinuousDistribution] = None): From 3e3405ff997d3a2abf82dbdda0e43acf26e5308e Mon Sep 17 00:00:00 2001 From: jrzkaminski Date: Fri, 5 Jul 2024 18:39:53 +0300 Subject: [PATCH 15/15] Some refactoring --- bamt/core/nodes/root_nodes/continuous_node.py | 10 -------- bamt/core/nodes/root_nodes/discrete_node.py | 23 ++++++++++++++++++- 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/bamt/core/nodes/root_nodes/continuous_node.py b/bamt/core/nodes/root_nodes/continuous_node.py index 82edcf5..9e07fd9 100644 --- a/bamt/core/nodes/root_nodes/continuous_node.py +++ b/bamt/core/nodes/root_nodes/continuous_node.py @@ -42,13 +42,3 @@ def __str__(self) -> str: str: The string representation of the node. """ return "Continuous Node with " + str(self._distribution) - - @property - def distribution(self) -> ContinuousDistribution: - """ - Get the continuous distribution of this node. - - Returns: - ContinuousDistribution: The continuous distribution. - """ - return self._distribution diff --git a/bamt/core/nodes/root_nodes/discrete_node.py b/bamt/core/nodes/root_nodes/discrete_node.py index d9b04c8..fca7ccf 100644 --- a/bamt/core/nodes/root_nodes/discrete_node.py +++ b/bamt/core/nodes/root_nodes/discrete_node.py @@ -1,6 +1,27 @@ +from typing import Optional + from .root_node import RootNode +from bamt.core.node_models import EmpiricalDistribution class DiscreteNode(RootNode): - def __init__(self): + def __init__(self, distribution: Optional[EmpiricalDistribution] = None): + """ + Initialize the DisscreteNode with an optional EmpiricalDistribution. + + Args: + distribution (Optional[ContinuousDistribution]): A ContinuousDistribution object. + """ super().__init__() + self._distribution = ( + distribution if distribution is not None else EmpiricalDistribution + ) + + def __str__(self) -> str: + """ + Return the string representation of the Discrete node. + + Returns: + str: The string representation of the node. + """ + return "Discrete Node with " + str(self._distribution)