diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 62ef879f..141e249c 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -17,6 +17,8 @@ repos:
   hooks:
   - id: mypy
     args: [--ignore-missing-imports]
+    additional_dependencies:
+    - pydantic>=1.10.4
 - repo: https://github.com/macisamuele/language-formatters-pre-commit-hooks
   rev: v2.12.0
   hooks:
@@ -54,6 +56,8 @@ repos:
   rev: v2.3.0
   hooks:
   - id: codespell
+    additional_dependencies:
+    - tomli
 
 - repo: https://github.com/hija/clean-dotenv
   rev: v0.0.7
diff --git a/poetry.lock b/poetry.lock
index 563e3712..e57b733a 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -365,13 +365,13 @@ numpy = "*"
 
 [[package]]
 name = "certifi"
-version = "2024.2.2"
+version = "2024.6.2"
 description = "Python package for providing Mozilla's CA Bundle."
 optional = false
 python-versions = ">=3.6"
 files = [
-    {file = "certifi-2024.2.2-py3-none-any.whl", hash = "sha256:dc383c07b76109f368f6106eee2b593b04a011ea4d55f652c6ca24a754d1cdd1"},
-    {file = "certifi-2024.2.2.tar.gz", hash = "sha256:0569859f95fc761b18b45ef421b1290a0f65f147e92a1e5eb3e635f9a5e4e66f"},
+    {file = "certifi-2024.6.2-py3-none-any.whl", hash = "sha256:ddc6c8ce995e6987e7faf5e3f1b02b302836a0e5d98ece18392cb1a36c72ad56"},
+    {file = "certifi-2024.6.2.tar.gz", hash = "sha256:3cd43f1c6fa7dedc5899d69d3ad0398fd018ad1a17fba83ddaf78aa46c747516"},
 ]
 
 [[package]]
@@ -703,13 +703,13 @@ toml = ["tomli"]
 
 [[package]]
 name = "datasets"
-version = "2.19.1"
+version = "2.19.2"
 description = "HuggingFace community-driven open-source library of datasets"
 optional = false
 python-versions = ">=3.8.0"
 files = [
-    {file = "datasets-2.19.1-py3-none-any.whl", hash = "sha256:f7a78d15896f45004ccac1c298f3c7121f92f91f6f2bfbd4e4f210f827e6e411"},
-    {file = "datasets-2.19.1.tar.gz", hash = "sha256:0df9ef6c5e9138cdb996a07385220109ff203c204245578b69cca905eb151d3a"},
+    {file = "datasets-2.19.2-py3-none-any.whl", hash = "sha256:e07ff15d75b1af75c87dd96323ba2a361128d495136652f37fd62f918d17bb4e"},
+    {file = "datasets-2.19.2.tar.gz", hash = "sha256:eccb82fb3bb5ee26ccc6d7a15b7f1f834e2cc4e59b7cff7733a003552bad51ef"},
 ]
 
 [package.dependencies]
@@ -725,7 +725,7 @@ pandas = "*"
 pyarrow = ">=12.0.0"
 pyarrow-hotfix = "*"
 pyyaml = ">=5.1"
-requests = ">=2.19.0"
+requests = ">=2.32.1"
 tqdm = ">=4.62.1"
 xxhash = "*"
 
@@ -733,7 +733,7 @@ xxhash = "*"
 apache-beam = ["apache-beam (>=2.26.0)"]
 audio = ["librosa", "soundfile (>=0.12.1)"]
 benchmarks = ["tensorflow (==2.12.0)", "torch (==2.0.1)", "transformers (==4.30.1)"]
-dev = ["Pillow (>=6.2.1)", "absl-py", "apache-beam (>=2.26.0)", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.6.4)", "jax (>=0.3.14)", "jaxlib (>=0.3.14)", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "ruff (>=0.3.0)", "s3fs", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "sqlalchemy", "tensorflow (>=2.6.0)", "tiktoken", "torch", "torch (>=2.0.0)", "transformers", "typing-extensions (>=4.6.1)", "zstandard"]
+dev = ["Pillow (>=9.4.0)", "absl-py", "apache-beam (>=2.26.0)", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.6.4)", "jax (>=0.3.14)", "jaxlib (>=0.3.14)", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "ruff (>=0.3.0)", "s3fs", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "sqlalchemy", "tensorflow (>=2.6.0)", "tiktoken", "torch", "torch (>=2.0.0)", "transformers", "typing-extensions (>=4.6.1)", "zstandard"]
 docs = ["s3fs", "tensorflow (>=2.6.0)", "torch", "transformers"]
 jax = ["jax (>=0.3.14)", "jaxlib (>=0.3.14)"]
 metrics-tests = ["Werkzeug (>=1.0.1)", "accelerate", "bert-score (>=0.3.6)", "jiwer", "langdetect", "mauve-text", "nltk", "requests-file (>=1.5.1)", "rouge-score", "sacrebleu", "sacremoses", "scikit-learn", "scipy", "sentencepiece", "seqeval", "six (>=1.15.0,<1.16.0)", "spacy (>=3.0.0)", "texttable (>=1.6.3)", "tldextract", "tldextract (>=3.1.0)", "toml (>=0.10.1)", "typer (<0.5.0)"]
@@ -741,9 +741,9 @@ quality = ["ruff (>=0.3.0)"]
 s3 = ["s3fs"]
 tensorflow = ["tensorflow (>=2.6.0)"]
 tensorflow-gpu = ["tensorflow (>=2.6.0)"]
-tests = ["Pillow (>=6.2.1)", "absl-py", "apache-beam (>=2.26.0)", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.6.4)", "jax (>=0.3.14)", "jaxlib (>=0.3.14)", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "sqlalchemy", "tensorflow (>=2.6.0)", "tiktoken", "torch (>=2.0.0)", "transformers", "typing-extensions (>=4.6.1)", "zstandard"]
+tests = ["Pillow (>=9.4.0)", "absl-py", "apache-beam (>=2.26.0)", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.6.4)", "jax (>=0.3.14)", "jaxlib (>=0.3.14)", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "sqlalchemy", "tensorflow (>=2.6.0)", "tiktoken", "torch (>=2.0.0)", "transformers", "typing-extensions (>=4.6.1)", "zstandard"]
 torch = ["torch"]
-vision = ["Pillow (>=6.2.1)"]
+vision = ["Pillow (>=9.4.0)"]
 
 [[package]]
 name = "debugpy"
@@ -880,20 +880,20 @@ dev = ["Sphinx (==2.1.0)", "future (==0.17.1)", "numpy (==1.16.4)", "pytest (==4
 
 [[package]]
 name = "fileformats"
-version = "0.11.2"
+version = "0.11.3"
 description = "Classes for representing different file formats in Python classes for use in type hinting in data workflows"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "fileformats-0.11.2-py3-none-any.whl", hash = "sha256:12a9c04e251e741d4b5ce0323a8723546e9d0dc1176fe724d44f0f4e80118d82"},
-    {file = "fileformats-0.11.2.tar.gz", hash = "sha256:97beaa64e658b139110994bc9119522208f72780d65233e640536fa56942da0c"},
+    {file = "fileformats-0.11.3-py3-none-any.whl", hash = "sha256:460a56344addb30b82fa9fe134cc1552aac7eeb76d6dcaf0ecea77d76095b361"},
+    {file = "fileformats-0.11.3.tar.gz", hash = "sha256:c1bbc82d4039e6dc5f0e5063fa18afe516e5bc2312a24f9aa199ef8ccfc355ae"},
 ]
 
 [package.dependencies]
 typing-extensions = {version = ">=4.6.3", markers = "python_version < \"3.11\""}
 
 [package.extras]
-dev = ["black", "codespell", "fileformats[test]", "flake8", "flake8-pyproject", "pre-commit", "pydata-sphinx-theme (>=0.13)"]
+dev = ["black", "codespell", "flake8", "flake8-pyproject", "pre-commit", "pydata-sphinx-theme (>=0.13)", "pydra (>=0.23.0a0)", "pytest (>=6.2.5)", "pytest-cov (>=2.12.1)", "pytest-env (>=0.6.2)"]
 docs = ["docutils (>=0.10)", "furo (>=2022.2.14.1)", "mock (>1.0)", "numpydoc (>=0.6.0)", "packaging", "sphinx (>=2.1.2)", "sphinx-argparse (>=0.2.0)", "sphinx-click (>=3.1)"]
 test = ["pydra (>=0.23.0a0)", "pytest (>=6.2.5)", "pytest-cov (>=2.12.1)", "pytest-env (>=0.6.2)"]
 
@@ -1050,13 +1050,13 @@ files = [
 
 [[package]]
 name = "huggingface-hub"
-version = "0.23.2"
+version = "0.23.3"
 description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
 optional = false
 python-versions = ">=3.8.0"
 files = [
-    {file = "huggingface_hub-0.23.2-py3-none-any.whl", hash = "sha256:48727a16e704d409c4bb5913613308499664f22a99743435dc3a13b23c485827"},
-    {file = "huggingface_hub-0.23.2.tar.gz", hash = "sha256:f6829b62d5fdecb452a76fdbec620cba4c1573655a8d710c1df71735fd9edbd2"},
+    {file = "huggingface_hub-0.23.3-py3-none-any.whl", hash = "sha256:22222c41223f1b7c209ae5511d2d82907325a0e3cdbce5f66949d43c598ff3bc"},
+    {file = "huggingface_hub-0.23.3.tar.gz", hash = "sha256:1a1118a0b3dea3bab6c325d71be16f5ffe441d32f3ac7c348d6875911b694b5b"},
 ]
 
 [package.dependencies]
@@ -1082,6 +1082,21 @@ testing = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "fastapi", "gr
 torch = ["safetensors", "torch"]
 typing = ["types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)"]
 
+[[package]]
+name = "hyperpyyaml"
+version = "1.2.2"
+description = "Extensions to YAML syntax for better python interaction"
+optional = false
+python-versions = "*"
+files = [
+    {file = "HyperPyYAML-1.2.2-py3-none-any.whl", hash = "sha256:3c5864bdc8864b2f0fbd7bc495e7e8fdf2dfd5dd80116f72da27ca96a128bdeb"},
+    {file = "HyperPyYAML-1.2.2.tar.gz", hash = "sha256:bdb734210d18770a262f500fe5755c7a44a5d3b91521b06e24f7a00a36ee0f87"},
+]
+
+[package.dependencies]
+pyyaml = ">=5.1"
+"ruamel.yaml" = ">=0.17.28"
+
 [[package]]
 name = "identify"
 version = "2.5.36"
@@ -1187,13 +1202,13 @@ test = ["flaky", "ipyparallel", "pre-commit", "pytest (>=7.0)", "pytest-asyncio
 
 [[package]]
 name = "ipython"
-version = "8.24.0"
+version = "8.25.0"
 description = "IPython: Productive Interactive Computing"
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "ipython-8.24.0-py3-none-any.whl", hash = "sha256:d7bf2f6c4314984e3e02393213bab8703cf163ede39672ce5918c51fe253a2a3"},
-    {file = "ipython-8.24.0.tar.gz", hash = "sha256:010db3f8a728a578bb641fdd06c063b9fb8e96a9464c63aec6310fbcb5e80501"},
+    {file = "ipython-8.25.0-py3-none-any.whl", hash = "sha256:53eee7ad44df903a06655871cbab66d156a051fd86f3ec6750470ac9604ac1ab"},
+    {file = "ipython-8.25.0.tar.gz", hash = "sha256:c6ed726a140b6e725b911528f80439c534fac915246af3efc39440a6b0f9d716"},
 ]
 
 [package.dependencies]
@@ -1212,7 +1227,7 @@ typing-extensions = {version = ">=4.6", markers = "python_version < \"3.12\""}
 [package.extras]
 all = ["ipython[black,doc,kernel,matplotlib,nbconvert,nbformat,notebook,parallel,qtconsole]", "ipython[test,test-extra]"]
 black = ["black"]
-doc = ["docrepr", "exceptiongroup", "ipykernel", "ipython[test]", "matplotlib", "setuptools (>=18.5)", "sphinx (>=1.3)", "sphinx-rtd-theme", "sphinxcontrib-jquery", "stack-data", "typing-extensions"]
+doc = ["docrepr", "exceptiongroup", "intersphinx-registry", "ipykernel", "ipython[test]", "matplotlib", "setuptools (>=18.5)", "sphinx (>=1.3)", "sphinx-rtd-theme", "sphinxcontrib-jquery", "tomli", "typing-extensions"]
 kernel = ["ipykernel"]
 matplotlib = ["matplotlib"]
 nbconvert = ["nbconvert"]
@@ -1285,6 +1300,21 @@ MarkupSafe = ">=2.0"
 [package.extras]
 i18n = ["Babel (>=2.7)"]
 
+[[package]]
+name = "jiwer"
+version = "3.0.4"
+description = "Evaluate your speech-to-text system with similarity measures such as word error rate (WER)"
+optional = false
+python-versions = "<4.0,>=3.7"
+files = [
+    {file = "jiwer-3.0.4-py3-none-any.whl", hash = "sha256:d6761a1cb7c5a8e3f4bafa96cf4b4f125e2ccc82b625f74dd23557414e97f86f"},
+    {file = "jiwer-3.0.4.tar.gz", hash = "sha256:2438acdc7ca22128fcab4be60db595809d2b5e73785b736de36dc3281a2a6ae8"},
+]
+
+[package.dependencies]
+click = ">=8.1.3,<9.0.0"
+rapidfuzz = ">=3,<4"
+
 [[package]]
 name = "joblib"
 version = "1.4.2"
@@ -1846,18 +1876,15 @@ test = ["pytest (>=7.2)", "pytest-cov (>=4.0)"]
 
 [[package]]
 name = "nodeenv"
-version = "1.8.0"
+version = "1.9.1"
 description = "Node.js virtual environment builder"
 optional = false
-python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*"
+python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
 files = [
-    {file = "nodeenv-1.8.0-py2.py3-none-any.whl", hash = "sha256:df865724bb3c3adc86b3876fa209771517b0cfe596beff01a92700e0e8be4cec"},
-    {file = "nodeenv-1.8.0.tar.gz", hash = "sha256:d51e0c37e64fbf47d017feac3145cdbb58836d7eee8c6f6d3b6880c5456227d2"},
+    {file = "nodeenv-1.9.1-py2.py3-none-any.whl", hash = "sha256:ba11c9782d29c27c70ffbdda2d7415098754709be8a7056d79a737cd901155c9"},
+    {file = "nodeenv-1.9.1.tar.gz", hash = "sha256:6ec12890a2dab7946721edbfbcd91f3319c6ccc9aec47be7c7e6b7011ee6645f"},
 ]
 
-[package.dependencies]
-setuptools = "*"
-
 [[package]]
 name = "numba"
 version = "0.59.1"
@@ -2506,13 +2533,13 @@ files = [
 
 [[package]]
 name = "prompt-toolkit"
-version = "3.0.45"
+version = "3.0.46"
 description = "Library for building powerful interactive command lines in Python"
 optional = false
 python-versions = ">=3.7.0"
 files = [
-    {file = "prompt_toolkit-3.0.45-py3-none-any.whl", hash = "sha256:a29b89160e494e3ea8622b09fa5897610b437884dcdcd054fdc1308883326c2a"},
-    {file = "prompt_toolkit-3.0.45.tar.gz", hash = "sha256:07c60ee4ab7b7e90824b61afa840c8f5aad2d46b3e2e10acc33d8ecc94a49089"},
+    {file = "prompt_toolkit-3.0.46-py3-none-any.whl", hash = "sha256:45abe60a8300f3c618b23c16c4bb98c6fc80af8ce8b17c7ae92db48db3ee63c1"},
+    {file = "prompt_toolkit-3.0.46.tar.gz", hash = "sha256:869c50d682152336e23c4db7f74667639b5047494202ffe7670817053fd57795"},
 ]
 
 [package.dependencies]
@@ -2643,18 +2670,18 @@ files = [
 
 [[package]]
 name = "pydantic"
-version = "2.7.2"
+version = "2.7.3"
 description = "Data validation using Python type hints"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "pydantic-2.7.2-py3-none-any.whl", hash = "sha256:834ab954175f94e6e68258537dc49402c4a5e9d0409b9f1b86b7e934a8372de7"},
-    {file = "pydantic-2.7.2.tar.gz", hash = "sha256:71b2945998f9c9b7919a45bde9a50397b289937d215ae141c1d0903ba7149fd7"},
+    {file = "pydantic-2.7.3-py3-none-any.whl", hash = "sha256:ea91b002777bf643bb20dd717c028ec43216b24a6001a280f83877fd2655d0b4"},
+    {file = "pydantic-2.7.3.tar.gz", hash = "sha256:c46c76a40bb1296728d7a8b99aa73dd70a48c3510111ff290034f860c99c419e"},
 ]
 
 [package.dependencies]
 annotated-types = ">=0.4.0"
-pydantic-core = "2.18.3"
+pydantic-core = "2.18.4"
 typing-extensions = ">=4.6.1"
 
 [package.extras]
@@ -2662,90 +2689,90 @@ email = ["email-validator (>=2.0.0)"]
 
 [[package]]
 name = "pydantic-core"
-version = "2.18.3"
+version = "2.18.4"
 description = "Core functionality for Pydantic validation and serialization"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "pydantic_core-2.18.3-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:744697428fcdec6be5670460b578161d1ffe34743a5c15656be7ea82b008197c"},
-    {file = "pydantic_core-2.18.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:37b40c05ced1ba4218b14986fe6f283d22e1ae2ff4c8e28881a70fb81fbfcda7"},
-    {file = "pydantic_core-2.18.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:544a9a75622357076efb6b311983ff190fbfb3c12fc3a853122b34d3d358126c"},
-    {file = "pydantic_core-2.18.3-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e2e253af04ceaebde8eb201eb3f3e3e7e390f2d275a88300d6a1959d710539e2"},
-    {file = "pydantic_core-2.18.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:855ec66589c68aa367d989da5c4755bb74ee92ccad4fdb6af942c3612c067e34"},
-    {file = "pydantic_core-2.18.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3d3e42bb54e7e9d72c13ce112e02eb1b3b55681ee948d748842171201a03a98a"},
-    {file = "pydantic_core-2.18.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c6ac9ffccc9d2e69d9fba841441d4259cb668ac180e51b30d3632cd7abca2b9b"},
-    {file = "pydantic_core-2.18.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c56eca1686539fa0c9bda992e7bd6a37583f20083c37590413381acfc5f192d6"},
-    {file = "pydantic_core-2.18.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:17954d784bf8abfc0ec2a633108207ebc4fa2df1a0e4c0c3ccbaa9bb01d2c426"},
-    {file = "pydantic_core-2.18.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:98ed737567d8f2ecd54f7c8d4f8572ca7c7921ede93a2e52939416170d357812"},
-    {file = "pydantic_core-2.18.3-cp310-none-win32.whl", hash = "sha256:9f9e04afebd3ed8c15d67a564ed0a34b54e52136c6d40d14c5547b238390e779"},
-    {file = "pydantic_core-2.18.3-cp310-none-win_amd64.whl", hash = "sha256:45e4ffbae34f7ae30d0047697e724e534a7ec0a82ef9994b7913a412c21462a0"},
-    {file = "pydantic_core-2.18.3-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:b9ebe8231726c49518b16b237b9fe0d7d361dd221302af511a83d4ada01183ab"},
-    {file = "pydantic_core-2.18.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b8e20e15d18bf7dbb453be78a2d858f946f5cdf06c5072453dace00ab652e2b2"},
-    {file = "pydantic_core-2.18.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c0d9ff283cd3459fa0bf9b0256a2b6f01ac1ff9ffb034e24457b9035f75587cb"},
-    {file = "pydantic_core-2.18.3-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2f7ef5f0ebb77ba24c9970da18b771711edc5feaf00c10b18461e0f5f5949231"},
-    {file = "pydantic_core-2.18.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:73038d66614d2e5cde30435b5afdced2b473b4c77d4ca3a8624dd3e41a9c19be"},
-    {file = "pydantic_core-2.18.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6afd5c867a74c4d314c557b5ea9520183fadfbd1df4c2d6e09fd0d990ce412cd"},
-    {file = "pydantic_core-2.18.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bd7df92f28d351bb9f12470f4c533cf03d1b52ec5a6e5c58c65b183055a60106"},
-    {file = "pydantic_core-2.18.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:80aea0ffeb1049336043d07799eace1c9602519fb3192916ff525b0287b2b1e4"},
-    {file = "pydantic_core-2.18.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:aaee40f25bba38132e655ffa3d1998a6d576ba7cf81deff8bfa189fb43fd2bbe"},
-    {file = "pydantic_core-2.18.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:9128089da8f4fe73f7a91973895ebf2502539d627891a14034e45fb9e707e26d"},
-    {file = "pydantic_core-2.18.3-cp311-none-win32.whl", hash = "sha256:fec02527e1e03257aa25b1a4dcbe697b40a22f1229f5d026503e8b7ff6d2eda7"},
-    {file = "pydantic_core-2.18.3-cp311-none-win_amd64.whl", hash = "sha256:58ff8631dbab6c7c982e6425da8347108449321f61fe427c52ddfadd66642af7"},
-    {file = "pydantic_core-2.18.3-cp311-none-win_arm64.whl", hash = "sha256:3fc1c7f67f34c6c2ef9c213e0f2a351797cda98249d9ca56a70ce4ebcaba45f4"},
-    {file = "pydantic_core-2.18.3-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:f0928cde2ae416a2d1ebe6dee324709c6f73e93494d8c7aea92df99aab1fc40f"},
-    {file = "pydantic_core-2.18.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0bee9bb305a562f8b9271855afb6ce00223f545de3d68560b3c1649c7c5295e9"},
-    {file = "pydantic_core-2.18.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e862823be114387257dacbfa7d78547165a85d7add33b446ca4f4fae92c7ff5c"},
-    {file = "pydantic_core-2.18.3-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6a36f78674cbddc165abab0df961b5f96b14461d05feec5e1f78da58808b97e7"},
-    {file = "pydantic_core-2.18.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ba905d184f62e7ddbb7a5a751d8a5c805463511c7b08d1aca4a3e8c11f2e5048"},
-    {file = "pydantic_core-2.18.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7fdd362f6a586e681ff86550b2379e532fee63c52def1c666887956748eaa326"},
-    {file = "pydantic_core-2.18.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:24b214b7ee3bd3b865e963dbed0f8bc5375f49449d70e8d407b567af3222aae4"},
-    {file = "pydantic_core-2.18.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:691018785779766127f531674fa82bb368df5b36b461622b12e176c18e119022"},
-    {file = "pydantic_core-2.18.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:60e4c625e6f7155d7d0dcac151edf5858102bc61bf959d04469ca6ee4e8381bd"},
-    {file = "pydantic_core-2.18.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a4e651e47d981c1b701dcc74ab8fec5a60a5b004650416b4abbef13db23bc7be"},
-    {file = "pydantic_core-2.18.3-cp312-none-win32.whl", hash = "sha256:ffecbb5edb7f5ffae13599aec33b735e9e4c7676ca1633c60f2c606beb17efc5"},
-    {file = "pydantic_core-2.18.3-cp312-none-win_amd64.whl", hash = "sha256:2c8333f6e934733483c7eddffdb094c143b9463d2af7e6bd85ebcb2d4a1b82c6"},
-    {file = "pydantic_core-2.18.3-cp312-none-win_arm64.whl", hash = "sha256:7a20dded653e516a4655f4c98e97ccafb13753987434fe7cf044aa25f5b7d417"},
-    {file = "pydantic_core-2.18.3-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:eecf63195be644b0396f972c82598cd15693550f0ff236dcf7ab92e2eb6d3522"},
-    {file = "pydantic_core-2.18.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:2c44efdd3b6125419c28821590d7ec891c9cb0dff33a7a78d9d5c8b6f66b9702"},
-    {file = "pydantic_core-2.18.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6e59fca51ffbdd1638b3856779342ed69bcecb8484c1d4b8bdb237d0eb5a45e2"},
-    {file = "pydantic_core-2.18.3-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:70cf099197d6b98953468461d753563b28e73cf1eade2ffe069675d2657ed1d5"},
-    {file = "pydantic_core-2.18.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:63081a49dddc6124754b32a3774331467bfc3d2bd5ff8f10df36a95602560361"},
-    {file = "pydantic_core-2.18.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:370059b7883485c9edb9655355ff46d912f4b03b009d929220d9294c7fd9fd60"},
-    {file = "pydantic_core-2.18.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5a64faeedfd8254f05f5cf6fc755023a7e1606af3959cfc1a9285744cc711044"},
-    {file = "pydantic_core-2.18.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:19d2e725de0f90d8671f89e420d36c3dd97639b98145e42fcc0e1f6d492a46dc"},
-    {file = "pydantic_core-2.18.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:67bc078025d70ec5aefe6200ef094576c9d86bd36982df1301c758a9fff7d7f4"},
-    {file = "pydantic_core-2.18.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:adf952c3f4100e203cbaf8e0c907c835d3e28f9041474e52b651761dc248a3c0"},
-    {file = "pydantic_core-2.18.3-cp38-none-win32.whl", hash = "sha256:9a46795b1f3beb167eaee91736d5d17ac3a994bf2215a996aed825a45f897558"},
-    {file = "pydantic_core-2.18.3-cp38-none-win_amd64.whl", hash = "sha256:200ad4e3133cb99ed82342a101a5abf3d924722e71cd581cc113fe828f727fbc"},
-    {file = "pydantic_core-2.18.3-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:304378b7bf92206036c8ddd83a2ba7b7d1a5b425acafff637172a3aa72ad7083"},
-    {file = "pydantic_core-2.18.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c826870b277143e701c9ccf34ebc33ddb4d072612683a044e7cce2d52f6c3fef"},
-    {file = "pydantic_core-2.18.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e201935d282707394f3668380e41ccf25b5794d1b131cdd96b07f615a33ca4b1"},
-    {file = "pydantic_core-2.18.3-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5560dda746c44b48bf82b3d191d74fe8efc5686a9ef18e69bdabccbbb9ad9442"},
-    {file = "pydantic_core-2.18.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6b32c2a1f8032570842257e4c19288eba9a2bba4712af542327de9a1204faff8"},
-    {file = "pydantic_core-2.18.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:929c24e9dea3990bc8bcd27c5f2d3916c0c86f5511d2caa69e0d5290115344a9"},
-    {file = "pydantic_core-2.18.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e1a8376fef60790152564b0eab376b3e23dd6e54f29d84aad46f7b264ecca943"},
-    {file = "pydantic_core-2.18.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:dccf3ef1400390ddd1fb55bf0632209d39140552d068ee5ac45553b556780e06"},
-    {file = "pydantic_core-2.18.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:41dbdcb0c7252b58fa931fec47937edb422c9cb22528f41cb8963665c372caf6"},
-    {file = "pydantic_core-2.18.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:666e45cf071669fde468886654742fa10b0e74cd0fa0430a46ba6056b24fb0af"},
-    {file = "pydantic_core-2.18.3-cp39-none-win32.whl", hash = "sha256:f9c08cabff68704a1b4667d33f534d544b8a07b8e5d039c37067fceb18789e78"},
-    {file = "pydantic_core-2.18.3-cp39-none-win_amd64.whl", hash = "sha256:4afa5f5973e8572b5c0dcb4e2d4fda7890e7cd63329bd5cc3263a25c92ef0026"},
-    {file = "pydantic_core-2.18.3-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:77319771a026f7c7d29c6ebc623de889e9563b7087911b46fd06c044a12aa5e9"},
-    {file = "pydantic_core-2.18.3-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:df11fa992e9f576473038510d66dd305bcd51d7dd508c163a8c8fe148454e059"},
-    {file = "pydantic_core-2.18.3-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d531076bdfb65af593326ffd567e6ab3da145020dafb9187a1d131064a55f97c"},
-    {file = "pydantic_core-2.18.3-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d33ce258e4e6e6038f2b9e8b8a631d17d017567db43483314993b3ca345dcbbb"},
-    {file = "pydantic_core-2.18.3-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1f9cd7f5635b719939019be9bda47ecb56e165e51dd26c9a217a433e3d0d59a9"},
-    {file = "pydantic_core-2.18.3-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:cd4a032bb65cc132cae1fe3e52877daecc2097965cd3914e44fbd12b00dae7c5"},
-    {file = "pydantic_core-2.18.3-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:82f2718430098bcdf60402136c845e4126a189959d103900ebabb6774a5d9fdb"},
-    {file = "pydantic_core-2.18.3-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:c0037a92cf0c580ed14e10953cdd26528e8796307bb8bb312dc65f71547df04d"},
-    {file = "pydantic_core-2.18.3-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:b95a0972fac2b1ff3c94629fc9081b16371dad870959f1408cc33b2f78ad347a"},
-    {file = "pydantic_core-2.18.3-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:a62e437d687cc148381bdd5f51e3e81f5b20a735c55f690c5be94e05da2b0d5c"},
-    {file = "pydantic_core-2.18.3-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b367a73a414bbb08507da102dc2cde0fa7afe57d09b3240ce82a16d608a7679c"},
-    {file = "pydantic_core-2.18.3-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ecce4b2360aa3f008da3327d652e74a0e743908eac306198b47e1c58b03dd2b"},
-    {file = "pydantic_core-2.18.3-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:bd4435b8d83f0c9561a2a9585b1de78f1abb17cb0cef5f39bf6a4b47d19bafe3"},
-    {file = "pydantic_core-2.18.3-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:616221a6d473c5b9aa83fa8982745441f6a4a62a66436be9445c65f241b86c94"},
-    {file = "pydantic_core-2.18.3-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:7e6382ce89a92bc1d0c0c5edd51e931432202b9080dc921d8d003e616402efd1"},
-    {file = "pydantic_core-2.18.3-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:ff58f379345603d940e461eae474b6bbb6dab66ed9a851ecd3cb3709bf4dcf6a"},
-    {file = "pydantic_core-2.18.3.tar.gz", hash = "sha256:432e999088d85c8f36b9a3f769a8e2b57aabd817bbb729a90d1fe7f18f6f1f39"},
+    {file = "pydantic_core-2.18.4-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:f76d0ad001edd426b92233d45c746fd08f467d56100fd8f30e9ace4b005266e4"},
+    {file = "pydantic_core-2.18.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:59ff3e89f4eaf14050c8022011862df275b552caef8082e37b542b066ce1ff26"},
+    {file = "pydantic_core-2.18.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a55b5b16c839df1070bc113c1f7f94a0af4433fcfa1b41799ce7606e5c79ce0a"},
+    {file = "pydantic_core-2.18.4-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4d0dcc59664fcb8974b356fe0a18a672d6d7cf9f54746c05f43275fc48636851"},
+    {file = "pydantic_core-2.18.4-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8951eee36c57cd128f779e641e21eb40bc5073eb28b2d23f33eb0ef14ffb3f5d"},
+    {file = "pydantic_core-2.18.4-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4701b19f7e3a06ea655513f7938de6f108123bf7c86bbebb1196eb9bd35cf724"},
+    {file = "pydantic_core-2.18.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e00a3f196329e08e43d99b79b286d60ce46bed10f2280d25a1718399457e06be"},
+    {file = "pydantic_core-2.18.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:97736815b9cc893b2b7f663628e63f436018b75f44854c8027040e05230eeddb"},
+    {file = "pydantic_core-2.18.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:6891a2ae0e8692679c07728819b6e2b822fb30ca7445f67bbf6509b25a96332c"},
+    {file = "pydantic_core-2.18.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:bc4ff9805858bd54d1a20efff925ccd89c9d2e7cf4986144b30802bf78091c3e"},
+    {file = "pydantic_core-2.18.4-cp310-none-win32.whl", hash = "sha256:1b4de2e51bbcb61fdebd0ab86ef28062704f62c82bbf4addc4e37fa4b00b7cbc"},
+    {file = "pydantic_core-2.18.4-cp310-none-win_amd64.whl", hash = "sha256:6a750aec7bf431517a9fd78cb93c97b9b0c496090fee84a47a0d23668976b4b0"},
+    {file = "pydantic_core-2.18.4-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:942ba11e7dfb66dc70f9ae66b33452f51ac7bb90676da39a7345e99ffb55402d"},
+    {file = "pydantic_core-2.18.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b2ebef0e0b4454320274f5e83a41844c63438fdc874ea40a8b5b4ecb7693f1c4"},
+    {file = "pydantic_core-2.18.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a642295cd0c8df1b86fc3dced1d067874c353a188dc8e0f744626d49e9aa51c4"},
+    {file = "pydantic_core-2.18.4-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5f09baa656c904807e832cf9cce799c6460c450c4ad80803517032da0cd062e2"},
+    {file = "pydantic_core-2.18.4-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:98906207f29bc2c459ff64fa007afd10a8c8ac080f7e4d5beff4c97086a3dabd"},
+    {file = "pydantic_core-2.18.4-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:19894b95aacfa98e7cb093cd7881a0c76f55731efad31073db4521e2b6ff5b7d"},
+    {file = "pydantic_core-2.18.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0fbbdc827fe5e42e4d196c746b890b3d72876bdbf160b0eafe9f0334525119c8"},
+    {file = "pydantic_core-2.18.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f85d05aa0918283cf29a30b547b4df2fbb56b45b135f9e35b6807cb28bc47951"},
+    {file = "pydantic_core-2.18.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e85637bc8fe81ddb73fda9e56bab24560bdddfa98aa64f87aaa4e4b6730c23d2"},
+    {file = "pydantic_core-2.18.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:2f5966897e5461f818e136b8451d0551a2e77259eb0f73a837027b47dc95dab9"},
+    {file = "pydantic_core-2.18.4-cp311-none-win32.whl", hash = "sha256:44c7486a4228413c317952e9d89598bcdfb06399735e49e0f8df643e1ccd0558"},
+    {file = "pydantic_core-2.18.4-cp311-none-win_amd64.whl", hash = "sha256:8a7164fe2005d03c64fd3b85649891cd4953a8de53107940bf272500ba8a788b"},
+    {file = "pydantic_core-2.18.4-cp311-none-win_arm64.whl", hash = "sha256:4e99bc050fe65c450344421017f98298a97cefc18c53bb2f7b3531eb39bc7805"},
+    {file = "pydantic_core-2.18.4-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:6f5c4d41b2771c730ea1c34e458e781b18cc668d194958e0112455fff4e402b2"},
+    {file = "pydantic_core-2.18.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2fdf2156aa3d017fddf8aea5adfba9f777db1d6022d392b682d2a8329e087cef"},
+    {file = "pydantic_core-2.18.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4748321b5078216070b151d5271ef3e7cc905ab170bbfd27d5c83ee3ec436695"},
+    {file = "pydantic_core-2.18.4-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:847a35c4d58721c5dc3dba599878ebbdfd96784f3fb8bb2c356e123bdcd73f34"},
+    {file = "pydantic_core-2.18.4-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3c40d4eaad41f78e3bbda31b89edc46a3f3dc6e171bf0ecf097ff7a0ffff7cb1"},
+    {file = "pydantic_core-2.18.4-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:21a5e440dbe315ab9825fcd459b8814bb92b27c974cbc23c3e8baa2b76890077"},
+    {file = "pydantic_core-2.18.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:01dd777215e2aa86dfd664daed5957704b769e726626393438f9c87690ce78c3"},
+    {file = "pydantic_core-2.18.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4b06beb3b3f1479d32befd1f3079cc47b34fa2da62457cdf6c963393340b56e9"},
+    {file = "pydantic_core-2.18.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:564d7922e4b13a16b98772441879fcdcbe82ff50daa622d681dd682175ea918c"},
+    {file = "pydantic_core-2.18.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:0eb2a4f660fcd8e2b1c90ad566db2b98d7f3f4717c64fe0a83e0adb39766d5b8"},
+    {file = "pydantic_core-2.18.4-cp312-none-win32.whl", hash = "sha256:8b8bab4c97248095ae0c4455b5a1cd1cdd96e4e4769306ab19dda135ea4cdb07"},
+    {file = "pydantic_core-2.18.4-cp312-none-win_amd64.whl", hash = "sha256:14601cdb733d741b8958224030e2bfe21a4a881fb3dd6fbb21f071cabd48fa0a"},
+    {file = "pydantic_core-2.18.4-cp312-none-win_arm64.whl", hash = "sha256:c1322d7dd74713dcc157a2b7898a564ab091ca6c58302d5c7b4c07296e3fd00f"},
+    {file = "pydantic_core-2.18.4-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:823be1deb01793da05ecb0484d6c9e20baebb39bd42b5d72636ae9cf8350dbd2"},
+    {file = "pydantic_core-2.18.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ebef0dd9bf9b812bf75bda96743f2a6c5734a02092ae7f721c048d156d5fabae"},
+    {file = "pydantic_core-2.18.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ae1d6df168efb88d7d522664693607b80b4080be6750c913eefb77e34c12c71a"},
+    {file = "pydantic_core-2.18.4-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f9899c94762343f2cc2fc64c13e7cae4c3cc65cdfc87dd810a31654c9b7358cc"},
+    {file = "pydantic_core-2.18.4-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:99457f184ad90235cfe8461c4d70ab7dd2680e28821c29eca00252ba90308c78"},
+    {file = "pydantic_core-2.18.4-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:18f469a3d2a2fdafe99296a87e8a4c37748b5080a26b806a707f25a902c040a8"},
+    {file = "pydantic_core-2.18.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b7cdf28938ac6b8b49ae5e92f2735056a7ba99c9b110a474473fd71185c1af5d"},
+    {file = "pydantic_core-2.18.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:938cb21650855054dc54dfd9120a851c974f95450f00683399006aa6e8abb057"},
+    {file = "pydantic_core-2.18.4-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:44cd83ab6a51da80fb5adbd9560e26018e2ac7826f9626bc06ca3dc074cd198b"},
+    {file = "pydantic_core-2.18.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:972658f4a72d02b8abfa2581d92d59f59897d2e9f7e708fdabe922f9087773af"},
+    {file = "pydantic_core-2.18.4-cp38-none-win32.whl", hash = "sha256:1d886dc848e60cb7666f771e406acae54ab279b9f1e4143babc9c2258213daa2"},
+    {file = "pydantic_core-2.18.4-cp38-none-win_amd64.whl", hash = "sha256:bb4462bd43c2460774914b8525f79b00f8f407c945d50881568f294c1d9b4443"},
+    {file = "pydantic_core-2.18.4-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:44a688331d4a4e2129140a8118479443bd6f1905231138971372fcde37e43528"},
+    {file = "pydantic_core-2.18.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a2fdd81edd64342c85ac7cf2753ccae0b79bf2dfa063785503cb85a7d3593223"},
+    {file = "pydantic_core-2.18.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:86110d7e1907ab36691f80b33eb2da87d780f4739ae773e5fc83fb272f88825f"},
+    {file = "pydantic_core-2.18.4-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:46387e38bd641b3ee5ce247563b60c5ca098da9c56c75c157a05eaa0933ed154"},
+    {file = "pydantic_core-2.18.4-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:123c3cec203e3f5ac7b000bd82235f1a3eced8665b63d18be751f115588fea30"},
+    {file = "pydantic_core-2.18.4-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dc1803ac5c32ec324c5261c7209e8f8ce88e83254c4e1aebdc8b0a39f9ddb443"},
+    {file = "pydantic_core-2.18.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:53db086f9f6ab2b4061958d9c276d1dbe3690e8dd727d6abf2321d6cce37fa94"},
+    {file = "pydantic_core-2.18.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:abc267fa9837245cc28ea6929f19fa335f3dc330a35d2e45509b6566dc18be23"},
+    {file = "pydantic_core-2.18.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:a0d829524aaefdebccb869eed855e2d04c21d2d7479b6cada7ace5448416597b"},
+    {file = "pydantic_core-2.18.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:509daade3b8649f80d4e5ff21aa5673e4ebe58590b25fe42fac5f0f52c6f034a"},
+    {file = "pydantic_core-2.18.4-cp39-none-win32.whl", hash = "sha256:ca26a1e73c48cfc54c4a76ff78df3727b9d9f4ccc8dbee4ae3f73306a591676d"},
+    {file = "pydantic_core-2.18.4-cp39-none-win_amd64.whl", hash = "sha256:c67598100338d5d985db1b3d21f3619ef392e185e71b8d52bceacc4a7771ea7e"},
+    {file = "pydantic_core-2.18.4-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:574d92eac874f7f4db0ca653514d823a0d22e2354359d0759e3f6a406db5d55d"},
+    {file = "pydantic_core-2.18.4-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:1f4d26ceb5eb9eed4af91bebeae4b06c3fb28966ca3a8fb765208cf6b51102ab"},
+    {file = "pydantic_core-2.18.4-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77450e6d20016ec41f43ca4a6c63e9fdde03f0ae3fe90e7c27bdbeaece8b1ed4"},
+    {file = "pydantic_core-2.18.4-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d323a01da91851a4f17bf592faf46149c9169d68430b3146dcba2bb5e5719abc"},
+    {file = "pydantic_core-2.18.4-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:43d447dd2ae072a0065389092a231283f62d960030ecd27565672bd40746c507"},
+    {file = "pydantic_core-2.18.4-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:578e24f761f3b425834f297b9935e1ce2e30f51400964ce4801002435a1b41ef"},
+    {file = "pydantic_core-2.18.4-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:81b5efb2f126454586d0f40c4d834010979cb80785173d1586df845a632e4e6d"},
+    {file = "pydantic_core-2.18.4-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:ab86ce7c8f9bea87b9d12c7f0af71102acbf5ecbc66c17796cff45dae54ef9a5"},
+    {file = "pydantic_core-2.18.4-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:90afc12421df2b1b4dcc975f814e21bc1754640d502a2fbcc6d41e77af5ec312"},
+    {file = "pydantic_core-2.18.4-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:51991a89639a912c17bef4b45c87bd83593aee0437d8102556af4885811d59f5"},
+    {file = "pydantic_core-2.18.4-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:293afe532740370aba8c060882f7d26cfd00c94cae32fd2e212a3a6e3b7bc15e"},
+    {file = "pydantic_core-2.18.4-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b48ece5bde2e768197a2d0f6e925f9d7e3e826f0ad2271120f8144a9db18d5c8"},
+    {file = "pydantic_core-2.18.4-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:eae237477a873ab46e8dd748e515c72c0c804fb380fbe6c85533c7de51f23a8f"},
+    {file = "pydantic_core-2.18.4-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:834b5230b5dfc0c1ec37b2fda433b271cbbc0e507560b5d1588e2cc1148cf1ce"},
+    {file = "pydantic_core-2.18.4-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:e858ac0a25074ba4bce653f9b5d0a85b7456eaddadc0ce82d3878c22489fa4ee"},
+    {file = "pydantic_core-2.18.4-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:2fd41f6eff4c20778d717af1cc50eca52f5afe7805ee530a4fbd0bae284f16e9"},
+    {file = "pydantic_core-2.18.4.tar.gz", hash = "sha256:ec3beeada09ff865c344ff3bc2f427f5e6c26401cc6113d77e372c3fdac73864"},
 ]
 
 [package.dependencies]
@@ -2797,13 +2824,13 @@ windows-terminal = ["colorama (>=0.4.6)"]
 
 [[package]]
 name = "pytest"
-version = "8.2.1"
+version = "8.2.2"
 description = "pytest: simple powerful testing with Python"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "pytest-8.2.1-py3-none-any.whl", hash = "sha256:faccc5d332b8c3719f40283d0d44aa5cf101cec36f88cde9ed8f2bc0538612b1"},
-    {file = "pytest-8.2.1.tar.gz", hash = "sha256:5046e5b46d8e4cac199c373041f26be56fdb81eb4e67dc11d4e10811fc3408fd"},
+    {file = "pytest-8.2.2-py3-none-any.whl", hash = "sha256:c434598117762e2bd304e526244f67bf66bbd7b5d6cf22138be51ff661980343"},
+    {file = "pytest-8.2.2.tar.gz", hash = "sha256:de4bb8104e201939ccdc688b27a89a7be2079b22e2bd2b07f806b6ba71117977"},
 ]
 
 [package.dependencies]
@@ -3060,6 +3087,111 @@ files = [
 [package.dependencies]
 cffi = {version = "*", markers = "implementation_name == \"pypy\""}
 
+[[package]]
+name = "rapidfuzz"
+version = "3.9.3"
+description = "rapid fuzzy string matching"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "rapidfuzz-3.9.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bdb8c5b8e29238ec80727c2ba3b301efd45aa30c6a7001123a6647b8e6f77ea4"},
+    {file = "rapidfuzz-3.9.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b3bd0d9632088c63a241f217742b1cf86e2e8ae573e01354775bd5016d12138c"},
+    {file = "rapidfuzz-3.9.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:153f23c03d4917f6a1fc2fb56d279cc6537d1929237ff08ee7429d0e40464a18"},
+    {file = "rapidfuzz-3.9.3-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a96c5225e840f1587f1bac8fa6f67562b38e095341576e82b728a82021f26d62"},
+    {file = "rapidfuzz-3.9.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b777cd910ceecd738adc58593d6ed42e73f60ad04ecdb4a841ae410b51c92e0e"},
+    {file = "rapidfuzz-3.9.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:53e06e4b81f552da04940aa41fc556ba39dee5513d1861144300c36c33265b76"},
+    {file = "rapidfuzz-3.9.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5c7ca5b6050f18fdcacdada2dc5fb7619ff998cd9aba82aed2414eee74ebe6cd"},
+    {file = "rapidfuzz-3.9.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:87bb8d84cb41446a808c4b5f746e29d8a53499381ed72f6c4e456fe0f81c80a8"},
+    {file = "rapidfuzz-3.9.3-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:959a15186d18425d19811bea86a8ffbe19fd48644004d29008e636631420a9b7"},
+    {file = "rapidfuzz-3.9.3-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:a24603dd05fb4e3c09d636b881ce347e5f55f925a6b1b4115527308a323b9f8e"},
+    {file = "rapidfuzz-3.9.3-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:0d055da0e801c71dd74ba81d72d41b2fa32afa182b9fea6b4b199d2ce937450d"},
+    {file = "rapidfuzz-3.9.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:875b581afb29a7213cf9d98cb0f98df862f1020bce9d9b2e6199b60e78a41d14"},
+    {file = "rapidfuzz-3.9.3-cp310-cp310-win32.whl", hash = "sha256:6073a46f61479a89802e3f04655267caa6c14eb8ac9d81a635a13805f735ebc1"},
+    {file = "rapidfuzz-3.9.3-cp310-cp310-win_amd64.whl", hash = "sha256:119c010e20e561249b99ca2627f769fdc8305b07193f63dbc07bca0a6c27e892"},
+    {file = "rapidfuzz-3.9.3-cp310-cp310-win_arm64.whl", hash = "sha256:790b0b244f3213581d42baa2fed8875f9ee2b2f9b91f94f100ec80d15b140ba9"},
+    {file = "rapidfuzz-3.9.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f57e8305c281e8c8bc720515540e0580355100c0a7a541105c6cafc5de71daae"},
+    {file = "rapidfuzz-3.9.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a4fc7b784cf987dbddc300cef70e09a92ed1bce136f7bb723ea79d7e297fe76d"},
+    {file = "rapidfuzz-3.9.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5b422c0a6fe139d5447a0766268e68e6a2a8c2611519f894b1f31f0a392b9167"},
+    {file = "rapidfuzz-3.9.3-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f50fed4a9b0c9825ff37cf0bccafd51ff5792090618f7846a7650f21f85579c9"},
+    {file = "rapidfuzz-3.9.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b80eb7cbe62348c61d3e67e17057cddfd6defab168863028146e07d5a8b24a89"},
+    {file = "rapidfuzz-3.9.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:65f45be77ec82da32ce5709a362e236ccf801615cc7163b136d1778cf9e31b14"},
+    {file = "rapidfuzz-3.9.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd84b7f652a5610733400307dc732f57c4a907080bef9520412e6d9b55bc9adc"},
+    {file = "rapidfuzz-3.9.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:3e6d27dad8c990218b8cd4a5c99cbc8834f82bb46ab965a7265d5aa69fc7ced7"},
+    {file = "rapidfuzz-3.9.3-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:05ee0696ebf0dfe8f7c17f364d70617616afc7dafe366532730ca34056065b8a"},
+    {file = "rapidfuzz-3.9.3-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:2bc8391749e5022cd9e514ede5316f86e332ffd3cfceeabdc0b17b7e45198a8c"},
+    {file = "rapidfuzz-3.9.3-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:93981895602cf5944d89d317ae3b1b4cc684d175a8ae2a80ce5b65615e72ddd0"},
+    {file = "rapidfuzz-3.9.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:754b719a4990735f66653c9e9261dcf52fd4d925597e43d6b9069afcae700d21"},
+    {file = "rapidfuzz-3.9.3-cp311-cp311-win32.whl", hash = "sha256:14c9f268ade4c88cf77ab007ad0fdf63699af071ee69378de89fff7aa3cae134"},
+    {file = "rapidfuzz-3.9.3-cp311-cp311-win_amd64.whl", hash = "sha256:bc1991b4cde6c9d3c0bbcb83d5581dc7621bec8c666c095c65b4277233265a82"},
+    {file = "rapidfuzz-3.9.3-cp311-cp311-win_arm64.whl", hash = "sha256:0c34139df09a61b1b557ab65782ada971b4a3bce7081d1b2bee45b0a52231adb"},
+    {file = "rapidfuzz-3.9.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:5d6a210347d6e71234af5c76d55eeb0348b026c9bb98fe7c1cca89bac50fb734"},
+    {file = "rapidfuzz-3.9.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b300708c917ce52f6075bdc6e05b07c51a085733650f14b732c087dc26e0aaad"},
+    {file = "rapidfuzz-3.9.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:83ea7ca577d76778250421de61fb55a719e45b841deb769351fc2b1740763050"},
+    {file = "rapidfuzz-3.9.3-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8319838fb5b7b5f088d12187d91d152b9386ce3979ed7660daa0ed1bff953791"},
+    {file = "rapidfuzz-3.9.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:505d99131afd21529293a9a7b91dfc661b7e889680b95534756134dc1cc2cd86"},
+    {file = "rapidfuzz-3.9.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c52970f7784518d7c82b07a62a26e345d2de8c2bd8ed4774e13342e4b3ff4200"},
+    {file = "rapidfuzz-3.9.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:143caf7247449055ecc3c1e874b69e42f403dfc049fc2f3d5f70e1daf21c1318"},
+    {file = "rapidfuzz-3.9.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b8ab0fa653d9225195a8ff924f992f4249c1e6fa0aea563f685e71b81b9fcccf"},
+    {file = "rapidfuzz-3.9.3-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:57e7c5bf7b61c7320cfa5dde1e60e678d954ede9bb7da8e763959b2138391401"},
+    {file = "rapidfuzz-3.9.3-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:51fa1ba84653ab480a2e2044e2277bd7f0123d6693051729755addc0d015c44f"},
+    {file = "rapidfuzz-3.9.3-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:17ff7f7eecdb169f9236e3b872c96dbbaf116f7787f4d490abd34b0116e3e9c8"},
+    {file = "rapidfuzz-3.9.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:afe7c72d3f917b066257f7ff48562e5d462d865a25fbcabf40fca303a9fa8d35"},
+    {file = "rapidfuzz-3.9.3-cp312-cp312-win32.whl", hash = "sha256:e53ed2e9b32674ce96eed80b3b572db9fd87aae6742941fb8e4705e541d861ce"},
+    {file = "rapidfuzz-3.9.3-cp312-cp312-win_amd64.whl", hash = "sha256:35b7286f177e4d8ba1e48b03612f928a3c4bdac78e5651379cec59f95d8651e6"},
+    {file = "rapidfuzz-3.9.3-cp312-cp312-win_arm64.whl", hash = "sha256:e6e4b9380ed4758d0cb578b0d1970c3f32dd9e87119378729a5340cb3169f879"},
+    {file = "rapidfuzz-3.9.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:a39890013f6d5b056cc4bfdedc093e322462ece1027a57ef0c636537bdde7531"},
+    {file = "rapidfuzz-3.9.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b5bc0fdbf419493163c5c9cb147c5fbe95b8e25844a74a8807dcb1a125e630cf"},
+    {file = "rapidfuzz-3.9.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:efe6e200a75a792d37b960457904c4fce7c928a96ae9e5d21d2bd382fe39066e"},
+    {file = "rapidfuzz-3.9.3-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:de077c468c225d4c18f7188c47d955a16d65f21aab121cbdd98e3e2011002c37"},
+    {file = "rapidfuzz-3.9.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8f917eaadf5388466a95f6a236f678a1588d231e52eda85374077101842e794e"},
+    {file = "rapidfuzz-3.9.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:858ba57c05afd720db8088a8707079e8d024afe4644001fe0dbd26ef7ca74a65"},
+    {file = "rapidfuzz-3.9.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d36447d21b05f90282a6f98c5a33771805f9222e5d0441d03eb8824e33e5bbb4"},
+    {file = "rapidfuzz-3.9.3-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:acbe4b6f1ccd5b90c29d428e849aa4242e51bb6cab0448d5f3c022eb9a25f7b1"},
+    {file = "rapidfuzz-3.9.3-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:53c7f27cdf899e94712972237bda48cfd427646aa6f5d939bf45d084780e4c16"},
+    {file = "rapidfuzz-3.9.3-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:6175682a829c6dea4d35ed707f1dadc16513270ef64436568d03b81ccb6bdb74"},
+    {file = "rapidfuzz-3.9.3-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:5276df395bd8497397197fca2b5c85f052d2e6a66ffc3eb0544dd9664d661f95"},
+    {file = "rapidfuzz-3.9.3-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:77b5c4f3e72924d7845f0e189c304270066d0f49635cf8a3938e122c437e58de"},
+    {file = "rapidfuzz-3.9.3-cp38-cp38-win32.whl", hash = "sha256:8add34061e5cd561c72ed4febb5c15969e7b25bda2bb5102d02afc3abc1f52d0"},
+    {file = "rapidfuzz-3.9.3-cp38-cp38-win_amd64.whl", hash = "sha256:604e0502a39cf8e67fa9ad239394dddad4cdef6d7008fdb037553817d420e108"},
+    {file = "rapidfuzz-3.9.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:21047f55d674614eb4b0ab34e35c3dc66f36403b9fbfae645199c4a19d4ed447"},
+    {file = "rapidfuzz-3.9.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a56da3aff97cb56fe85d9ca957d1f55dbac7c27da927a86a2a86d8a7e17f80aa"},
+    {file = "rapidfuzz-3.9.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:964c08481aec2fe574f0062e342924db2c6b321391aeb73d68853ed42420fd6d"},
+    {file = "rapidfuzz-3.9.3-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5e2b827258beefbe5d3f958243caa5a44cf46187eff0c20e0b2ab62d1550327a"},
+    {file = "rapidfuzz-3.9.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c6e65a301fcd19fbfbee3a514cc0014ff3f3b254b9fd65886e8a9d6957fb7bca"},
+    {file = "rapidfuzz-3.9.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cbe93ba1725a8d47d2b9dca6c1f435174859427fbc054d83de52aea5adc65729"},
+    {file = "rapidfuzz-3.9.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aca21c0a34adee582775da997a600283e012a608a107398d80a42f9a57ad323d"},
+    {file = "rapidfuzz-3.9.3-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:256e07d3465173b2a91c35715a2277b1ee3ae0b9bbab4e519df6af78570741d0"},
+    {file = "rapidfuzz-3.9.3-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:802ca2cc8aa6b8b34c6fdafb9e32540c1ba05fca7ad60b3bbd7ec89ed1797a87"},
+    {file = "rapidfuzz-3.9.3-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:dd789100fc852cffac1449f82af0da139d36d84fd9faa4f79fc4140a88778343"},
+    {file = "rapidfuzz-3.9.3-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:5d0abbacdb06e27ff803d7ae0bd0624020096802758068ebdcab9bd49cf53115"},
+    {file = "rapidfuzz-3.9.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:378d1744828e27490a823fc6fe6ebfb98c15228d54826bf4e49e4b76eb5f5579"},
+    {file = "rapidfuzz-3.9.3-cp39-cp39-win32.whl", hash = "sha256:5d0cb272d43e6d3c0dedefdcd9d00007471f77b52d2787a4695e9dd319bb39d2"},
+    {file = "rapidfuzz-3.9.3-cp39-cp39-win_amd64.whl", hash = "sha256:15e4158ac4b3fb58108072ec35b8a69165f651ba1c8f43559a36d518dbf9fb3f"},
+    {file = "rapidfuzz-3.9.3-cp39-cp39-win_arm64.whl", hash = "sha256:58c6a4936190c558d5626b79fc9e16497e5df7098589a7e80d8bff68148ff096"},
+    {file = "rapidfuzz-3.9.3-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:5410dc848c947a603792f4f51b904a3331cf1dc60621586bfbe7a6de72da1091"},
+    {file = "rapidfuzz-3.9.3-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:282d55700a1a3d3a7980746eb2fcd48c9bbc1572ebe0840d0340d548a54d01fe"},
+    {file = "rapidfuzz-3.9.3-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dc1037507810833646481f5729901a154523f98cbebb1157ba3a821012e16402"},
+    {file = "rapidfuzz-3.9.3-pp310-pypy310_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5e33f779391caedcba2ba3089fb6e8e557feab540e9149a5c3f7fea7a3a7df37"},
+    {file = "rapidfuzz-3.9.3-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:41a81a9f311dc83d22661f9b1a1de983b201322df0c4554042ffffd0f2040c37"},
+    {file = "rapidfuzz-3.9.3-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:a93250bd8fae996350c251e1752f2c03335bb8a0a5b0c7e910a593849121a435"},
+    {file = "rapidfuzz-3.9.3-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:3617d1aa7716c57d120b6adc8f7c989f2d65bc2b0cbd5f9288f1fc7bf469da11"},
+    {file = "rapidfuzz-3.9.3-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:ad04a3f5384b82933213bba2459f6424decc2823df40098920856bdee5fd6e88"},
+    {file = "rapidfuzz-3.9.3-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8709918da8a88ad73c9d4dd0ecf24179a4f0ceba0bee21efc6ea21a8b5290349"},
+    {file = "rapidfuzz-3.9.3-pp38-pypy38_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b770f85eab24034e6ef7df04b2bfd9a45048e24f8a808e903441aa5abde8ecdd"},
+    {file = "rapidfuzz-3.9.3-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:930b4e6fdb4d914390141a2b99a6f77a52beacf1d06aa4e170cba3a98e24c1bc"},
+    {file = "rapidfuzz-3.9.3-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:c8444e921bfc3757c475c4f4d7416a7aa69b2d992d5114fe55af21411187ab0d"},
+    {file = "rapidfuzz-3.9.3-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:2c1d3ef3878f871abe6826e386c3d61b5292ef5f7946fe646f4206b85836b5da"},
+    {file = "rapidfuzz-3.9.3-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:d861bf326ee7dabc35c532a40384541578cd1ec1e1b7db9f9ecbba56eb76ca22"},
+    {file = "rapidfuzz-3.9.3-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cde6b9d9ba5007077ee321ec722fa714ebc0cbd9a32ccf0f4dd3cc3f20952d71"},
+    {file = "rapidfuzz-3.9.3-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3bb6546e7b6bed1aefbe24f68a5fb9b891cc5aef61bca6c1a7b1054b7f0359bb"},
+    {file = "rapidfuzz-3.9.3-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3d8a57261ef7996d5ced7c8cba9189ada3fbeffd1815f70f635e4558d93766cb"},
+    {file = "rapidfuzz-3.9.3-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:67201c02efc596923ad950519e0b75ceb78d524177ea557134d6567b9ac2c283"},
+    {file = "rapidfuzz-3.9.3.tar.gz", hash = "sha256:b398ea66e8ed50451bce5997c430197d5e4b06ac4aa74602717f792d8d8d06e2"},
+]
+
+[package.extras]
+full = ["numpy"]
+
 [[package]]
 name = "referencing"
 version = "0.35.1"
@@ -3165,13 +3297,13 @@ files = [
 
 [[package]]
 name = "requests"
-version = "2.32.2"
+version = "2.32.3"
 description = "Python HTTP for Humans."
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "requests-2.32.2-py3-none-any.whl", hash = "sha256:fc06670dd0ed212426dfeb94fc1b983d917c4f9847c863f313c9dfaaffb7c23c"},
-    {file = "requests-2.32.2.tar.gz", hash = "sha256:dd951ff5ecf3e3b3aa26b40703ba77495dab41da839ae72ef3c8e5d8e2433289"},
+    {file = "requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6"},
+    {file = "requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760"},
 ]
 
 [package.dependencies]
@@ -3292,6 +3424,83 @@ files = [
     {file = "rpds_py-0.18.1.tar.gz", hash = "sha256:dc48b479d540770c811fbd1eb9ba2bb66951863e448efec2e2c102625328e92f"},
 ]
 
+[[package]]
+name = "ruamel-yaml"
+version = "0.18.6"
+description = "ruamel.yaml is a YAML parser/emitter that supports roundtrip preservation of comments, seq/map flow style, and map key order"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "ruamel.yaml-0.18.6-py3-none-any.whl", hash = "sha256:57b53ba33def16c4f3d807c0ccbc00f8a6081827e81ba2491691b76882d0c636"},
+    {file = "ruamel.yaml-0.18.6.tar.gz", hash = "sha256:8b27e6a217e786c6fbe5634d8f3f11bc63e0f80f6a5890f28863d9c45aac311b"},
+]
+
+[package.dependencies]
+"ruamel.yaml.clib" = {version = ">=0.2.7", markers = "platform_python_implementation == \"CPython\" and python_version < \"3.13\""}
+
+[package.extras]
+docs = ["mercurial (>5.7)", "ryd"]
+jinja2 = ["ruamel.yaml.jinja2 (>=0.2)"]
+
+[[package]]
+name = "ruamel-yaml-clib"
+version = "0.2.8"
+description = "C version of reader, parser and emitter for ruamel.yaml derived from libyaml"
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "ruamel.yaml.clib-0.2.8-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:b42169467c42b692c19cf539c38d4602069d8c1505e97b86387fcf7afb766e1d"},
+    {file = "ruamel.yaml.clib-0.2.8-cp310-cp310-macosx_13_0_arm64.whl", hash = "sha256:07238db9cbdf8fc1e9de2489a4f68474e70dffcb32232db7c08fa61ca0c7c462"},
+    {file = "ruamel.yaml.clib-0.2.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:fff3573c2db359f091e1589c3d7c5fc2f86f5bdb6f24252c2d8e539d4e45f412"},
+    {file = "ruamel.yaml.clib-0.2.8-cp310-cp310-manylinux_2_24_aarch64.whl", hash = "sha256:aa2267c6a303eb483de8d02db2871afb5c5fc15618d894300b88958f729ad74f"},
+    {file = "ruamel.yaml.clib-0.2.8-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:840f0c7f194986a63d2c2465ca63af8ccbbc90ab1c6001b1978f05119b5e7334"},
+    {file = "ruamel.yaml.clib-0.2.8-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:024cfe1fc7c7f4e1aff4a81e718109e13409767e4f871443cbff3dba3578203d"},
+    {file = "ruamel.yaml.clib-0.2.8-cp310-cp310-win32.whl", hash = "sha256:c69212f63169ec1cfc9bb44723bf2917cbbd8f6191a00ef3410f5a7fe300722d"},
+    {file = "ruamel.yaml.clib-0.2.8-cp310-cp310-win_amd64.whl", hash = "sha256:cabddb8d8ead485e255fe80429f833172b4cadf99274db39abc080e068cbcc31"},
+    {file = "ruamel.yaml.clib-0.2.8-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:bef08cd86169d9eafb3ccb0a39edb11d8e25f3dae2b28f5c52fd997521133069"},
+    {file = "ruamel.yaml.clib-0.2.8-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:b16420e621d26fdfa949a8b4b47ade8810c56002f5389970db4ddda51dbff248"},
+    {file = "ruamel.yaml.clib-0.2.8-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:25c515e350e5b739842fc3228d662413ef28f295791af5e5110b543cf0b57d9b"},
+    {file = "ruamel.yaml.clib-0.2.8-cp311-cp311-manylinux_2_24_aarch64.whl", hash = "sha256:1707814f0d9791df063f8c19bb51b0d1278b8e9a2353abbb676c2f685dee6afe"},
+    {file = "ruamel.yaml.clib-0.2.8-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:46d378daaac94f454b3a0e3d8d78cafd78a026b1d71443f4966c696b48a6d899"},
+    {file = "ruamel.yaml.clib-0.2.8-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:09b055c05697b38ecacb7ac50bdab2240bfca1a0c4872b0fd309bb07dc9aa3a9"},
+    {file = "ruamel.yaml.clib-0.2.8-cp311-cp311-win32.whl", hash = "sha256:53a300ed9cea38cf5a2a9b069058137c2ca1ce658a874b79baceb8f892f915a7"},
+    {file = "ruamel.yaml.clib-0.2.8-cp311-cp311-win_amd64.whl", hash = "sha256:c2a72e9109ea74e511e29032f3b670835f8a59bbdc9ce692c5b4ed91ccf1eedb"},
+    {file = "ruamel.yaml.clib-0.2.8-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:ebc06178e8821efc9692ea7544aa5644217358490145629914d8020042c24aa1"},
+    {file = "ruamel.yaml.clib-0.2.8-cp312-cp312-macosx_13_0_arm64.whl", hash = "sha256:edaef1c1200c4b4cb914583150dcaa3bc30e592e907c01117c08b13a07255ec2"},
+    {file = "ruamel.yaml.clib-0.2.8-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d176b57452ab5b7028ac47e7b3cf644bcfdc8cacfecf7e71759f7f51a59e5c92"},
+    {file = "ruamel.yaml.clib-0.2.8-cp312-cp312-manylinux_2_24_aarch64.whl", hash = "sha256:1dc67314e7e1086c9fdf2680b7b6c2be1c0d8e3a8279f2e993ca2a7545fecf62"},
+    {file = "ruamel.yaml.clib-0.2.8-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:3213ece08ea033eb159ac52ae052a4899b56ecc124bb80020d9bbceeb50258e9"},
+    {file = "ruamel.yaml.clib-0.2.8-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:aab7fd643f71d7946f2ee58cc88c9b7bfc97debd71dcc93e03e2d174628e7e2d"},
+    {file = "ruamel.yaml.clib-0.2.8-cp312-cp312-win32.whl", hash = "sha256:5c365d91c88390c8d0a8545df0b5857172824b1c604e867161e6b3d59a827eaa"},
+    {file = "ruamel.yaml.clib-0.2.8-cp312-cp312-win_amd64.whl", hash = "sha256:1758ce7d8e1a29d23de54a16ae867abd370f01b5a69e1a3ba75223eaa3ca1a1b"},
+    {file = "ruamel.yaml.clib-0.2.8-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:a5aa27bad2bb83670b71683aae140a1f52b0857a2deff56ad3f6c13a017a26ed"},
+    {file = "ruamel.yaml.clib-0.2.8-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:c58ecd827313af6864893e7af0a3bb85fd529f862b6adbefe14643947cfe2942"},
+    {file = "ruamel.yaml.clib-0.2.8-cp37-cp37m-macosx_12_0_arm64.whl", hash = "sha256:f481f16baec5290e45aebdc2a5168ebc6d35189ae6fea7a58787613a25f6e875"},
+    {file = "ruamel.yaml.clib-0.2.8-cp37-cp37m-manylinux_2_24_aarch64.whl", hash = "sha256:77159f5d5b5c14f7c34073862a6b7d34944075d9f93e681638f6d753606c6ce6"},
+    {file = "ruamel.yaml.clib-0.2.8-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:7f67a1ee819dc4562d444bbafb135832b0b909f81cc90f7aa00260968c9ca1b3"},
+    {file = "ruamel.yaml.clib-0.2.8-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:4ecbf9c3e19f9562c7fdd462e8d18dd902a47ca046a2e64dba80699f0b6c09b7"},
+    {file = "ruamel.yaml.clib-0.2.8-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:87ea5ff66d8064301a154b3933ae406b0863402a799b16e4a1d24d9fbbcbe0d3"},
+    {file = "ruamel.yaml.clib-0.2.8-cp37-cp37m-win32.whl", hash = "sha256:75e1ed13e1f9de23c5607fe6bd1aeaae21e523b32d83bb33918245361e9cc51b"},
+    {file = "ruamel.yaml.clib-0.2.8-cp37-cp37m-win_amd64.whl", hash = "sha256:3f215c5daf6a9d7bbed4a0a4f760f3113b10e82ff4c5c44bec20a68c8014f675"},
+    {file = "ruamel.yaml.clib-0.2.8-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1b617618914cb00bf5c34d4357c37aa15183fa229b24767259657746c9077615"},
+    {file = "ruamel.yaml.clib-0.2.8-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:a6a9ffd280b71ad062eae53ac1659ad86a17f59a0fdc7699fd9be40525153337"},
+    {file = "ruamel.yaml.clib-0.2.8-cp38-cp38-manylinux_2_24_aarch64.whl", hash = "sha256:305889baa4043a09e5b76f8e2a51d4ffba44259f6b4c72dec8ca56207d9c6fe1"},
+    {file = "ruamel.yaml.clib-0.2.8-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:700e4ebb569e59e16a976857c8798aee258dceac7c7d6b50cab63e080058df91"},
+    {file = "ruamel.yaml.clib-0.2.8-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:e2b4c44b60eadec492926a7270abb100ef9f72798e18743939bdbf037aab8c28"},
+    {file = "ruamel.yaml.clib-0.2.8-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:e79e5db08739731b0ce4850bed599235d601701d5694c36570a99a0c5ca41a9d"},
+    {file = "ruamel.yaml.clib-0.2.8-cp38-cp38-win32.whl", hash = "sha256:955eae71ac26c1ab35924203fda6220f84dce57d6d7884f189743e2abe3a9fbe"},
+    {file = "ruamel.yaml.clib-0.2.8-cp38-cp38-win_amd64.whl", hash = "sha256:56f4252222c067b4ce51ae12cbac231bce32aee1d33fbfc9d17e5b8d6966c312"},
+    {file = "ruamel.yaml.clib-0.2.8-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:03d1162b6d1df1caa3a4bd27aa51ce17c9afc2046c31b0ad60a0a96ec22f8001"},
+    {file = "ruamel.yaml.clib-0.2.8-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:bba64af9fa9cebe325a62fa398760f5c7206b215201b0ec825005f1b18b9bccf"},
+    {file = "ruamel.yaml.clib-0.2.8-cp39-cp39-manylinux_2_24_aarch64.whl", hash = "sha256:a1a45e0bb052edf6a1d3a93baef85319733a888363938e1fc9924cb00c8df24c"},
+    {file = "ruamel.yaml.clib-0.2.8-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:da09ad1c359a728e112d60116f626cc9f29730ff3e0e7db72b9a2dbc2e4beed5"},
+    {file = "ruamel.yaml.clib-0.2.8-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:184565012b60405d93838167f425713180b949e9d8dd0bbc7b49f074407c5a8b"},
+    {file = "ruamel.yaml.clib-0.2.8-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a75879bacf2c987c003368cf14bed0ffe99e8e85acfa6c0bfffc21a090f16880"},
+    {file = "ruamel.yaml.clib-0.2.8-cp39-cp39-win32.whl", hash = "sha256:84b554931e932c46f94ab306913ad7e11bba988104c5cff26d90d03f68258cd5"},
+    {file = "ruamel.yaml.clib-0.2.8-cp39-cp39-win_amd64.whl", hash = "sha256:25ac8c08322002b06fa1d49d1646181f0b2c72f5cbc15a85e80b4c30a544bb15"},
+    {file = "ruamel.yaml.clib-0.2.8.tar.gz", hash = "sha256:beb2e0404003de9a4cab9753a8805a8fe9320ee6673136ed7f04255fe60bb512"},
+]
+
 [[package]]
 name = "ruff"
 version = "0.3.7"
@@ -3552,20 +3761,67 @@ transformers = ">=4.34.0,<5.0.0"
 dev = ["pre-commit", "pytest", "ruff (>=0.3.0)"]
 
 [[package]]
-name = "setuptools"
-version = "70.0.0"
-description = "Easily download, build, install, upgrade, and uninstall Python packages"
+name = "sentencepiece"
+version = "0.2.0"
+description = "SentencePiece python wrapper"
 optional = false
-python-versions = ">=3.8"
+python-versions = "*"
 files = [
-    {file = "setuptools-70.0.0-py3-none-any.whl", hash = "sha256:54faa7f2e8d2d11bcd2c07bed282eef1046b5c080d1c32add737d7b5817b1ad4"},
-    {file = "setuptools-70.0.0.tar.gz", hash = "sha256:f211a66637b8fa059bb28183da127d4e86396c991a942b028c6650d4319c3fd0"},
+    {file = "sentencepiece-0.2.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:188779e1298a1c8b8253c7d3ad729cb0a9891e5cef5e5d07ce4592c54869e227"},
+    {file = "sentencepiece-0.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bed9cf85b296fa2b76fc2547b9cbb691a523864cebaee86304c43a7b4cb1b452"},
+    {file = "sentencepiece-0.2.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d7b67e724bead13f18db6e1d10b6bbdc454af574d70efbb36f27d90387be1ca3"},
+    {file = "sentencepiece-0.2.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2fde4b08cfe237be4484c6c7c2e2c75fb862cfeab6bd5449ce4caeafd97b767a"},
+    {file = "sentencepiece-0.2.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4c378492056202d1c48a4979650981635fd97875a00eabb1f00c6a236b013b5e"},
+    {file = "sentencepiece-0.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1380ce6540a368de2ef6d7e6ba14ba8f3258df650d39ba7d833b79ee68a52040"},
+    {file = "sentencepiece-0.2.0-cp310-cp310-win32.whl", hash = "sha256:a1151d6a6dd4b43e552394aed0edfe9292820272f0194bd56c7c1660a0c06c3d"},
+    {file = "sentencepiece-0.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:d490142b0521ef22bc1085f061d922a2a6666175bb6b42e588ff95c0db6819b2"},
+    {file = "sentencepiece-0.2.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:17982700c4f6dbb55fa3594f3d7e5dd1c8659a274af3738e33c987d2a27c9d5c"},
+    {file = "sentencepiece-0.2.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:7c867012c0e8bcd5bdad0f791609101cb5c66acb303ab3270218d6debc68a65e"},
+    {file = "sentencepiece-0.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7fd6071249c74f779c5b27183295b9202f8dedb68034e716784364443879eaa6"},
+    {file = "sentencepiece-0.2.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:27f90c55a65013cbb8f4d7aab0599bf925cde4adc67ae43a0d323677b5a1c6cb"},
+    {file = "sentencepiece-0.2.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b293734059ef656dcd65be62ff771507bea8fed0a711b6733976e1ed3add4553"},
+    {file = "sentencepiece-0.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e58b47f933aca74c6a60a79dcb21d5b9e47416256c795c2d58d55cec27f9551d"},
+    {file = "sentencepiece-0.2.0-cp311-cp311-win32.whl", hash = "sha256:c581258cf346b327c62c4f1cebd32691826306f6a41d8c4bec43b010dee08e75"},
+    {file = "sentencepiece-0.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:0993dbc665f4113017892f1b87c3904a44d0640eda510abcacdfb07f74286d36"},
+    {file = "sentencepiece-0.2.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:ea5f536e32ea8ec96086ee00d7a4a131ce583a1b18d130711707c10e69601cb2"},
+    {file = "sentencepiece-0.2.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:d0cb51f53b6aae3c36bafe41e86167c71af8370a039f542c43b0cce5ef24a68c"},
+    {file = "sentencepiece-0.2.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3212121805afc58d8b00ab4e7dd1f8f76c203ddb9dc94aa4079618a31cf5da0f"},
+    {file = "sentencepiece-0.2.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2a3149e3066c2a75e0d68a43eb632d7ae728c7925b517f4c05c40f6f7280ce08"},
+    {file = "sentencepiece-0.2.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:632f3594d3e7ac8b367bca204cb3fd05a01d5b21455acd097ea4c0e30e2f63d7"},
+    {file = "sentencepiece-0.2.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f295105c6bdbb05bd5e1b0cafbd78ff95036f5d3641e7949455a3f4e5e7c3109"},
+    {file = "sentencepiece-0.2.0-cp312-cp312-win32.whl", hash = "sha256:fb89f811e5efd18bab141afc3fea3de141c3f69f3fe9e898f710ae7fe3aab251"},
+    {file = "sentencepiece-0.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:7a673a72aab81fef5ebe755c6e0cc60087d1f3a4700835d40537183c1703a45f"},
+    {file = "sentencepiece-0.2.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:4547683f330289ec4f093027bfeb87f9ef023b2eb6f879fdc4a8187c7e0ffb90"},
+    {file = "sentencepiece-0.2.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7cd6175f7eaec7142d2bf6f6597ce7db4c9ac89acf93fcdb17410c3a8b781eeb"},
+    {file = "sentencepiece-0.2.0-cp36-cp36m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:859ba1acde782609a0910a26a60e16c191a82bf39b5621107552c0cd79fad00f"},
+    {file = "sentencepiece-0.2.0-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bcbbef6cc277f8f18f36959e305f10b1c620442d75addc79c21d7073ae581b50"},
+    {file = "sentencepiece-0.2.0-cp36-cp36m-win32.whl", hash = "sha256:536b934e244829e3fe6c4f198652cd82da48adb9aa145c9f00889542726dee3d"},
+    {file = "sentencepiece-0.2.0-cp36-cp36m-win_amd64.whl", hash = "sha256:0a91aaa3c769b52440df56fafda683b3aa48e3f2169cf7ee5b8c8454a7f3ae9b"},
+    {file = "sentencepiece-0.2.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:787e480ca4c1d08c9985a7eb1eae4345c107729c99e9b5a9a00f2575fc7d4b4b"},
+    {file = "sentencepiece-0.2.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f4d158189eb2ecffea3a51edf6d25e110b3678ec47f1a40f2d541eafbd8f6250"},
+    {file = "sentencepiece-0.2.0-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d1e5ca43013e8935f25457a4fca47e315780172c3e821b4b13a890668911c792"},
+    {file = "sentencepiece-0.2.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7140d9e5a74a0908493bb4a13f1f16a401297bd755ada4c707e842fbf6f0f5bf"},
+    {file = "sentencepiece-0.2.0-cp37-cp37m-win32.whl", hash = "sha256:6cf333625234f247ab357b0bd9836638405ea9082e1543d5b8408f014979dcbf"},
+    {file = "sentencepiece-0.2.0-cp37-cp37m-win_amd64.whl", hash = "sha256:ff88712338b01031910e8e61e7239aff3ce8869ee31a47df63cb38aadd591bea"},
+    {file = "sentencepiece-0.2.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:20813a68d4c221b1849c62c30e1281ea81687894d894b8d4a0f4677d9311e0f5"},
+    {file = "sentencepiece-0.2.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:926ef920ae2e8182db31d3f5d081ada57804e3e1d3a8c4ef8b117f9d9fb5a945"},
+    {file = "sentencepiece-0.2.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:89f65f69636b7e9c015b79dff9c9985a9bc7d19ded6f79ef9f1ec920fdd73ecf"},
+    {file = "sentencepiece-0.2.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0f67eae0dbe6f2d7d6ba50a354623d787c99965f068b81e145d53240198021b0"},
+    {file = "sentencepiece-0.2.0-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:98501e075f35dd1a1d5a20f65be26839fcb1938752ec61539af008a5aa6f510b"},
+    {file = "sentencepiece-0.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e3d1d2cc4882e8d6a1adf9d5927d7716f80617fc693385661caff21888972269"},
+    {file = "sentencepiece-0.2.0-cp38-cp38-win32.whl", hash = "sha256:b99a308a2e5e569031ab164b74e6fab0b6f37dfb493c32f7816225f4d411a6dd"},
+    {file = "sentencepiece-0.2.0-cp38-cp38-win_amd64.whl", hash = "sha256:cdb701eec783d3ec86b7cd4c763adad8eaf6b46db37ee1c36e5e6c44b3fe1b5f"},
+    {file = "sentencepiece-0.2.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:1e0f9c4d0a6b0af59b613175f019916e28ade076e21242fd5be24340d8a2f64a"},
+    {file = "sentencepiece-0.2.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:298f21cc1366eb60311aedba3169d30f885c363ddbf44214b0a587d2908141ad"},
+    {file = "sentencepiece-0.2.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3f1ec95aa1e5dab11f37ac7eff190493fd87770f7a8b81ebc9dd768d1a3c8704"},
+    {file = "sentencepiece-0.2.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7b06b70af54daa4b4904cbb90b4eb6d35c9f3252fdc86c9c32d5afd4d30118d8"},
+    {file = "sentencepiece-0.2.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:22e37bac44dd6603388cb598c64ff7a76e41ca774646f21c23aadfbf5a2228ab"},
+    {file = "sentencepiece-0.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0461324897735512a32d222e3d886e24ad6a499761952b6bda2a9ee6e4313ea5"},
+    {file = "sentencepiece-0.2.0-cp39-cp39-win32.whl", hash = "sha256:38aed822fb76435fa1f12185f10465a94ab9e51d5e8a9159e9a540ce926f0ffd"},
+    {file = "sentencepiece-0.2.0-cp39-cp39-win_amd64.whl", hash = "sha256:d8cf876516548b5a1d6ac4745d8b554f5c07891d55da557925e5c13ff0b4e6ad"},
+    {file = "sentencepiece-0.2.0.tar.gz", hash = "sha256:a52c19171daaf2e697dc6cbe67684e0fa341b1248966f6aebb541de654d15843"},
 ]
 
-[package.extras]
-docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier"]
-testing = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "importlib-metadata", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "mypy (==1.9)", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.1)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-home (>=0.5)", "pytest-mypy", "pytest-perf", "pytest-ruff (>=0.2.1)", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"]
-
 [[package]]
 name = "six"
 version = "1.16.0"
@@ -3646,6 +3902,29 @@ numpy = "*"
 docs = ["linkify-it-py", "myst-parser", "sphinx", "sphinx-book-theme"]
 test = ["pytest"]
 
+[[package]]
+name = "speechbrain"
+version = "1.0.0"
+description = "All-in-one speech toolkit in pure Python and Pytorch"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "speechbrain-1.0.0-py3-none-any.whl", hash = "sha256:3f163958fc5a6dc05851700e8cee9d828110fbb7a8defec911f25849ce33f45f"},
+    {file = "speechbrain-1.0.0.tar.gz", hash = "sha256:f44797a23e0351f6ebf5ffd323bf44857431b2ffa55ec8e0c9f66e2ac6f6cea7"},
+]
+
+[package.dependencies]
+huggingface-hub = "*"
+hyperpyyaml = "*"
+joblib = "*"
+numpy = "*"
+packaging = "*"
+scipy = "*"
+sentencepiece = "*"
+torch = ">=1.9"
+torchaudio = "*"
+tqdm = "*"
+
 [[package]]
 name = "stack-data"
 version = "0.6.3"
@@ -3667,17 +3946,17 @@ tests = ["cython", "littleutils", "pygments", "pytest", "typeguard"]
 
 [[package]]
 name = "sympy"
-version = "1.12"
+version = "1.12.1"
 description = "Computer algebra system (CAS) in Python"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "sympy-1.12-py3-none-any.whl", hash = "sha256:c3588cd4295d0c0f603d0f2ae780587e64e2efeedb3521e46b9bb1d08d184fa5"},
-    {file = "sympy-1.12.tar.gz", hash = "sha256:ebf595c8dac3e0fdc4152c51878b498396ec7f30e7a914d6071e674d49420fb8"},
+    {file = "sympy-1.12.1-py3-none-any.whl", hash = "sha256:9b2cbc7f1a640289430e13d2a56f02f867a1da0190f2f99d8968c2f74da0e515"},
+    {file = "sympy-1.12.1.tar.gz", hash = "sha256:2877b03f998cd8c08f07cd0de5b767119cd3ef40d09f41c30d722f6686b0fb88"},
 ]
 
 [package.dependencies]
-mpmath = ">=0.19"
+mpmath = ">=1.1.0,<1.4.0"
 
 [[package]]
 name = "threadpoolctl"
@@ -4050,13 +4329,13 @@ test = ["argcomplete (>=3.0.3)", "mypy (>=1.7.0)", "pre-commit", "pytest (>=7.0,
 
 [[package]]
 name = "transformers"
-version = "4.41.1"
+version = "4.41.2"
 description = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow"
 optional = false
 python-versions = ">=3.8.0"
 files = [
-    {file = "transformers-4.41.1-py3-none-any.whl", hash = "sha256:f0680e0b1a01067eccd11f62f0522409422c7d6f91d532fe0f50b136a406129d"},
-    {file = "transformers-4.41.1.tar.gz", hash = "sha256:fa859e4c66f0896633a3bf534e0d9a29a9a88478a49f94c5d8270537dc61cc42"},
+    {file = "transformers-4.41.2-py3-none-any.whl", hash = "sha256:05555d20e43f808de1ef211ab64803cdb513170cef70d29a888b589caebefc67"},
+    {file = "transformers-4.41.2.tar.gz", hash = "sha256:80a4db216533d573e9cc7388646c31ed9480918feb7c55eb211249cb23567f87"},
 ]
 
 [package.dependencies]
@@ -4139,13 +4418,13 @@ tutorials = ["matplotlib", "pandas", "tabulate", "torch"]
 
 [[package]]
 name = "typing-extensions"
-version = "4.12.0"
+version = "4.12.1"
 description = "Backported and Experimental Type Hints for Python 3.8+"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "typing_extensions-4.12.0-py3-none-any.whl", hash = "sha256:b349c66bea9016ac22978d800cfff206d5f9816951f12a7d0ec5578b0a819594"},
-    {file = "typing_extensions-4.12.0.tar.gz", hash = "sha256:8cbcdc8606ebcb0d95453ad7dc5065e6237b6aa230a31e81d0f440c30fed5fd8"},
+    {file = "typing_extensions-4.12.1-py3-none-any.whl", hash = "sha256:6024b58b69089e5a89c347397254e35f1bf02a907728ec7fee9bf0fe837d203a"},
+    {file = "typing_extensions-4.12.1.tar.gz", hash = "sha256:915f5e35ff76f56588223f15fdd5938f9a1cf9195c0de25130c627e4d597f6d1"},
 ]
 
 [[package]]
@@ -4429,20 +4708,20 @@ multidict = ">=4.0"
 
 [[package]]
 name = "zipp"
-version = "3.19.0"
+version = "3.19.2"
 description = "Backport of pathlib-compatible object wrapper for zip files"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "zipp-3.19.0-py3-none-any.whl", hash = "sha256:96dc6ad62f1441bcaccef23b274ec471518daf4fbbc580341204936a5a3dddec"},
-    {file = "zipp-3.19.0.tar.gz", hash = "sha256:952df858fb3164426c976d9338d3961e8e8b3758e2e059e0f754b8c4262625ee"},
+    {file = "zipp-3.19.2-py3-none-any.whl", hash = "sha256:f091755f667055f2d02b32c53771a7a6c8b47e1fdbc4b72a8b9072b3eef8015c"},
+    {file = "zipp-3.19.2.tar.gz", hash = "sha256:bf1dcf6450f873a13e952a29504887c89e6de7506209e5b1bcc3460135d4de19"},
 ]
 
 [package.extras]
-docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"]
-testing = ["big-O", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-itertools", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy", "pytest-ruff (>=0.2.1)"]
+doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"]
+test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-itertools", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy", "pytest-ruff (>=0.2.1)"]
 
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.10"
-content-hash = "37f8aaa4ac3c3d0fd9cb8e1d69b5419f93196e9647721f7e2aec53342e0981f6"
+content-hash = "29a57eaee26c5cc55f49ab5ebb6c003a40941a2f277fd344248f82f10f858bae"
diff --git a/pyproject.toml b/pyproject.toml
index cd17d839..2ea59107 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "senselab"
-version = "0.1.2.dev23+c952ded"
+version = "0.0.1"
 description = "Senselab is a Python package that simplifies building pipelines for speech and voice analysis."
 authors = [
   "Fabio Catania <fabiocat@mit.edu>",
@@ -28,7 +28,7 @@ packages = [{include = "senselab", from = "src"}]
 python = "^3.10"
 click = "^8.1.7"
 jsonschema = "^4.21.1"
-datasets = "^2.18.0"
+datasets = "^2.19.2"
 torch = "^2.2.2"
 torchvision = "^0.17.2"
 torchaudio = "^2.2.2"
@@ -38,21 +38,23 @@ soundfile = "^0.12.1"
 ffmpeg-python = "^0.2.0"
 ipykernel = "^6.29.4"
 pydra = "^0.23"
-pydantic = "^2.7.1"
+pydantic = "^2.7.3"
 accelerate = "^0.29.3"
-huggingface-hub = "^0.23.0"
+huggingface-hub = "^0.23.3"
 praat-parselmouth = "^0.4.3"
 iso-639 = {git = "https://github.com/noumar/iso639.git", tag = "0.4.5"}
 opensmile = "^2.5.0"
 audiomentations = "^0.35.0"
 torch-audiomentations = "^0.11.1"
 sentence-transformers = "^2.7.0"
+jiwer = "^3.0.4"
+speechbrain = "^1.0.0"
 
 [tool.poetry.group.dev]
 optional = true
 
 [tool.poetry.group.dev.dependencies]
-pytest = "^8.1.1"
+pytest = "^8.2.2"
 pytest-mock = "^3.14.0"
 mypy = "^1.9.0"
 pre-commit = "^3.7.0"
@@ -79,6 +81,9 @@ testpaths = [
 
 [tool.mypy]
 ignore_missing_imports = true
+plugins = [
+  "pydantic.mypy"
+]
 
 [tool.ruff]
 exclude = [
@@ -104,7 +109,7 @@ exclude = [
   "node_modules",
   "venv"
 ]
-line-length = 80
+line-length = 120
 indent-width = 4
 src = ["src"]
 target-version = "py310"
@@ -140,10 +145,10 @@ pattern = "default-unprefixed"
 
 [tool.codespell]
 skip = [
-  "./poetry.lock",
-  "./docs_style/pdoc-theme/syntax-highlighting.css"
+  "poetry.lock",
+  "docs_style/pdoc-theme/syntax-highlighting.css"
 ]
-ignore-words-list = ["senselab"]
+ignore-words-list = ["senselab", "nd", "astroid", "wil"]
 
 [build-system]
 requires = ["poetry-core>=1.0.0", "poetry-dynamic-versioning>=1.0.0,<2.0.0"]
diff --git a/scripts/experiment1.py b/scripts/experiment1.py
deleted file mode 100644
index d1eb75b6..00000000
--- a/scripts/experiment1.py
+++ /dev/null
@@ -1,170 +0,0 @@
-"""This script is used to test the audio tasks."""
-
-from typing import Any, Dict
-
-import pydra
-
-from senselab.audio.tasks.preprocessing import resample_hf_dataset
-from senselab.audio.tasks.preprocessing_pydra import resample_hf_dataset_pt
-from senselab.audio.tasks.speech_to_text import transcribe_dataset_with_hf
-from senselab.audio.tasks.speech_to_text_pydra import (
-    transcribe_dataset_with_hf_pt,
-)
-from senselab.utils.decorators import get_response_time
-from senselab.utils.tasks.input_output import read_files_from_disk
-from senselab.utils.tasks.input_output_pydra import read_files_from_disk_pt
-
-
-@get_response_time
-def workflow(data: Dict[str, Any]) -> None:
-    """This function reads files from disk and transcribes them with Whisper."""
-    print("Starting to read files from disk...")
-    dataset = read_files_from_disk(data["files"])
-    print(f"Dataset loaded with {len(dataset)} records.")
-
-    print("Resampling dataset...")
-    dataset = resample_hf_dataset(dataset, 16000)
-    print("Resampled dataset.")
-
-    """
-    print("Pushing dataset to the hub...")
-    push_dataset_to_hub(dataset,
-                        remote_repository="fabiocat/test",
-                        split="train")
-    print("Dataset pushed to the hub successfully.")
-    """
-
-    print("Transcribing dataset...")
-    _ = transcribe_dataset_with_hf(
-        dataset=dataset, model_id="openai/whisper-tiny", language="en"
-    )  # facebook/wav2vec2-base-960h
-    print("Transcribed dataset.")
-
-    """
-    print("Pushing dataset to the hub...")
-    push_dataset_to_hub(transcript_dataset,
-                        remote_repository="fabiocat/transcript")
-    print("Dataset pushed to the hub successfully.")
-    """
-
-
-@get_response_time
-def pydra_workflow(data: Dict[str, Any]) -> None:
-    """This function reads files from disk and transcribes them with Whisper."""
-    wf0 = pydra.Workflow(name="wf0", input_spec=["x"], x=data["files"])
-    wf0.add(
-        read_files_from_disk_pt(
-            name="read_files_from_disk_name", files=wf0.lzin.x
-        )
-    )
-    wf0.add(
-        resample_hf_dataset_pt(
-            name="resample_hf_dataset_name",
-            dataset=wf0.read_files_from_disk_name.lzout.out,
-            resample_rate=16000,
-        )
-    )
-
-    """
-    wf0.add(push_dataset_to_hub_pt(name='push_audio_dataset_to_hub_name',
-                                   dataset=wf0.resample_hf_dataset_name.lzout.out,
-                                   remote_repository="fabiocat/test",
-                                   split="train")
-                                   )
-    """
-
-    wf0.add(
-        transcribe_dataset_with_hf_pt(
-            name="transcribe_dataset_name",
-            dataset=wf0.resample_hf_dataset_name.lzout.out,
-            model_id="openai/whisper-tiny",
-            language="en",
-        )
-    )
-
-    """
-    wf0.add(push_dataset_to_hub_pt(name='push_transcript_dataset_to_hub_name',
-                                   dataset=wf0.transcribe_dataset_name.lzout.out,
-                                   remote_repository="fabiocat/transcript")
-                                   )
-    """
-
-    wf0.set_output([("out", wf0.transcribe_dataset_name.lzout.out)])
-
-    # PYDRA RUN
-    with pydra.Submitter(plugin="serial") as sub:
-        sub(wf0)
-
-    _ = wf0.result()
-
-
-@get_response_time
-def pydra_workflow2(data: Dict[str, Any]) -> None:
-    """This function reads files from disk and transcribes them with Whisper."""
-    wf0 = pydra.Workflow(name="wf0", input_spec=["x"], x=data["files"])
-
-    wf0.add(
-        read_files_from_disk_pt(
-            name="read_files_from_disk_name", files=wf0.lzin.x
-        ).split("files", files=wf0.lzin.x)
-    )
-    wf0.add(
-        resample_hf_dataset_pt(
-            name="resample_hf_dataset_name",
-            dataset=wf0.read_files_from_disk_name.lzout.out,
-            resample_rate=16000,
-        )
-    )
-
-    """
-    wf0.add(push_dataset_to_hub_pt(name='push_audio_dataset_to_hub_name',
-                                   dataset=wf0.resample_hf_dataset_name.lzout.out,
-                                   remote_repository="fabiocat/test",
-                                   split="train"))
-    """
-
-    wf0.add(
-        transcribe_dataset_with_hf_pt(
-            name="transcribe_dataset_name",
-            dataset=wf0.resample_hf_dataset_name.lzout.out,
-            model_id="openai/whisper-tiny",
-            language="en",
-        )
-    )
-    wf0.combine("x")
-
-    """
-    wf0.add(push_dataset_to_hub_pt(name='push_transcript_dataset_to_hub_name',
-                                   dataset=wf0.transcribe_dataset_name.lzout.out,
-                                   remote_repository="fabiocat/transcript"))
-    """
-
-    # TODO: create a dataset object from the combined transcripts
-    wf0.set_output([("out", wf0.transcribe_dataset_name.lzout.out)])
-
-    # PYDRA RUN
-    with pydra.Submitter(plugin="serial") as sub:
-        sub(wf0)
-
-    _ = wf0.result()
-
-
-data = {
-    "files": [
-        "/Users/fabiocat/Documents/git/pp/senselab/src/tests/data_for_testing/audio_48khz_mono_16bits.wav",
-        "/Users/fabiocat/Documents/git/pp/senselab/src/tests/data_for_testing/audio_48khz_mono_16bits.wav",
-        "/Users/fabiocat/Documents/git/pp/senselab/src/tests/data_for_testing/audio_48khz_mono_16bits.wav",
-        "/Users/fabiocat/Documents/git/pp/senselab/src/tests/data_for_testing/audio_48khz_mono_16bits.wav",
-        "/Users/fabiocat/Documents/git/pp/senselab/src/tests/data_for_testing/audio_48khz_mono_16bits.wav",
-        "/Users/fabiocat/Documents/git/pp/senselab/src/tests/data_for_testing/audio_48khz_mono_16bits.wav",
-        "/Users/fabiocat/Documents/git/pp/senselab/src/tests/data_for_testing/audio_48khz_mono_16bits.wav",
-        "/Users/fabiocat/Documents/git/pp/senselab/src/tests/data_for_testing/audio_48khz_mono_16bits.wav",
-        "/Users/fabiocat/Documents/git/pp/senselab/src/tests/data_for_testing/audio_48khz_mono_16bits.wav",
-    ]
-}
-
-workflow(data)
-print("\n\n")
-pydra_workflow(data)
-print("\n\n")
-pydra_workflow2(data)
diff --git a/scripts/experiment2.py b/scripts/experiment2.py
deleted file mode 100644
index 0ab1f7d4..00000000
--- a/scripts/experiment2.py
+++ /dev/null
@@ -1,46 +0,0 @@
-"""This script is used to test the audio tasks."""
-
-from senselab.audio.tasks.features_extraction.opensmile import (
-    extract_feats_from_dataset,
-)
-from senselab.audio.tasks.features_extraction.praat_parselmouth import (
-    get_hf_dataset_durations,
-    get_hf_dataset_f0_descriptors,
-    get_hf_dataset_harmonicity_descriptors,
-    get_hf_dataset_jitter_descriptors,
-    get_hf_dataset_shimmer_descriptors,
-)
-from senselab.utils.tasks.input_output import read_files_from_disk
-
-dataset = read_files_from_disk(
-    [
-        "/Users/fabiocat/Documents/git/pp/senselab/src/tests/data_for_testing/audio_48khz_mono_16bits.wav"
-    ]
-)
-
-print(dataset)
-
-duration_dataset = get_hf_dataset_durations(dataset)
-f0_dataset = get_hf_dataset_f0_descriptors(dataset, f0min=100, f0max=500)
-harmonicity_dataset = get_hf_dataset_harmonicity_descriptors(dataset, f0min=100)
-jitter_dataset = get_hf_dataset_jitter_descriptors(
-    dataset, f0min=100, f0max=500
-)
-shimmer_dataset = get_hf_dataset_shimmer_descriptors(
-    dataset, f0min=100, f0max=500
-)
-
-print(duration_dataset)
-print(f0_dataset)
-print(harmonicity_dataset)
-print(jitter_dataset)
-print(shimmer_dataset)
-
-opensmile_feats = extract_feats_from_dataset(
-    dataset,
-    audio_column="audio",
-    feature_set="eGeMAPSv02",
-    feature_level="Functionals",
-)
-
-print(opensmile_feats)
diff --git a/scripts/experiment3.py b/scripts/experiment3.py
deleted file mode 100644
index 85da5002..00000000
--- a/scripts/experiment3.py
+++ /dev/null
@@ -1,9 +0,0 @@
-"""This script is used to test the video tasks."""
-
-from senselab.video.input_output import extract_audios_from_local_videos
-
-files = ["../src/tests/data_for_testing/video_48khz_stereo_16bits.mp4"]
-dataset = extract_audios_from_local_videos(files)
-
-print("dataset")
-print(dataset)
diff --git a/scripts/experiment4.py b/scripts/experiment4.py
deleted file mode 100644
index b14eb339..00000000
--- a/scripts/experiment4.py
+++ /dev/null
@@ -1,17 +0,0 @@
-"""This script is used to test the voice cloning task."""
-
-from senselab.audio.tasks.preprocessing import resample_hf_dataset
-from senselab.audio.tasks.voice_cloning import clone_voice_in_dataset_with_KNNVC
-from senselab.utils.tasks.input_output import read_files_from_disk
-
-files = ["../src/tests/data_for_testing/audio_48khz_mono_16bits.wav"]
-dataset = read_files_from_disk(files)
-
-print("Resampling dataset...")
-dataset = resample_hf_dataset(dataset, 16000)
-print("Resampled dataset.")
-
-cloned_dataset = clone_voice_in_dataset_with_KNNVC(dataset, dataset)
-
-print("cloned_dataset")
-# print(cloned_dataset)
diff --git a/scripts/experiment5.py b/scripts/experiment5.py
deleted file mode 100644
index 7e65e8b4..00000000
--- a/scripts/experiment5.py
+++ /dev/null
@@ -1,43 +0,0 @@
-"""This script is used to test the audio tasks."""
-
-from typing import Any, Dict
-
-from torch_audiomentations import Compose, Gain, PolarityInversion
-
-from senselab.audio.tasks.data_augmentation import augment_hf_dataset
-from senselab.utils.decorators import get_response_time
-from senselab.utils.tasks.input_output import read_files_from_disk
-
-
-@get_response_time
-def workflow(data: Dict[str, Any], augmentation: Compose) -> None:
-    """This function reads files from disk and transcribes them with Whisper."""
-    print("Starting to read files from disk...")
-    dataset = read_files_from_disk(data["files"])
-    print(f"Dataset loaded with {len(dataset)} records.")
-
-    print("Augmenting dataset...")
-    dataset = augment_hf_dataset(dataset, augmentation)
-    print("Augmented dataset.")
-
-
-# Initialize augmentation callable
-apply_augmentation = Compose(
-    transforms=[
-        Gain(
-            min_gain_in_db=-15.0,
-            max_gain_in_db=5.0,
-            p=0.5,
-        ),
-        PolarityInversion(p=0.5),
-    ]
-)
-
-data = {
-    "files": [
-        "../src/tests/data_for_testing/audio_48khz_mono_16bits.wav",
-        "../src/tests/data_for_testing/audio_48khz_mono_16bits.wav",
-    ]
-}
-
-workflow(data, apply_augmentation)
diff --git a/scripts/experiment6.py b/scripts/experiment6.py
deleted file mode 100644
index fdd36557..00000000
--- a/scripts/experiment6.py
+++ /dev/null
@@ -1,48 +0,0 @@
-"""This script is used to test the audio tasks."""
-
-from typing import Any, Dict
-
-from senselab.audio.tasks.preprocessing import resample_hf_dataset
-from senselab.audio.tasks.speech_to_text import transcribe_dataset_with_hf
-from senselab.text.tasks.sentence_transofmers_embeddings_extraction import (
-    extract_embeddings_from_hf_dataset,
-)
-from senselab.utils.decorators import get_response_time
-from senselab.utils.tasks.input_output import read_files_from_disk
-
-
-@get_response_time
-def workflow(data: Dict[str, Any]) -> None:
-    """This function reads files from disk and transcribes them with Whisper."""
-    print("Starting to read files from disk...")
-    dataset = read_files_from_disk(data["files"])
-    print(f"Dataset loaded with {len(dataset)} records.")
-
-    print("Resampling dataset...")
-    dataset = resample_hf_dataset(dataset, 16000)
-    print("Resampled dataset.")
-
-    print("Transcribing dataset...")
-    transcript_dataset = transcribe_dataset_with_hf(
-        dataset=dataset, model_id="openai/whisper-tiny", language="en"
-    )  # facebook/wav2vec2-base-960h
-    print("Transcribed dataset.")
-
-    print("Extracting embeddings...")
-    _ = extract_embeddings_from_hf_dataset(
-        transcript_dataset,
-        model_id="sentence-transformers/paraphrase-MiniLM-L6-v2",
-        text_column="asr",
-    )
-    print("Extracted embeddings.")
-
-
-data = {
-    "files": [
-        "../src/tests/data_for_testing/audio_48khz_mono_16bits.wav",
-        "../src/tests/data_for_testing/audio_48khz_mono_16bits.wav",
-    ]
-}
-
-workflow(data)
-print("\n\n")
diff --git a/scripts/pyannote_31_experiment.py b/scripts/pyannote_31_experiment.py
deleted file mode 100644
index 84905b27..00000000
--- a/scripts/pyannote_31_experiment.py
+++ /dev/null
@@ -1,35 +0,0 @@
-"""Demonstrates use of pyannote_31.py with the PolyAI/minds14 dataset.
-
-PolyAI/minds14 dataset:
-https://huggingface.co/datasets/PolyAI/minds14
-"""
-
-
-import json
-
-from datasets import load_dataset
-
-from senselab.audio.tasks.preprocessing import resample_hf_dataset
-from senselab.audio.tasks.pyannote_speaker_diarization import (
-    pyannote_diarize,
-)
-from senselab.utils.tasks.input_output import _from_hf_dataset_to_dict
-
-HF_TOKEN = "YOUR HF_TOKEN"
-dataset = load_dataset("PolyAI/minds14", "en-US", split="train")
-dataset = dataset.select(range(4))
-
-dataset = _from_hf_dataset_to_dict(dataset)
-
-print("Resampling dataset...")
-dataset = resample_hf_dataset(dataset, 16000)
-print("Resampled dataset.")
-
-print("Diarizing dataset...")
-dataset_diarized = pyannote_diarize(dataset, 
-                                    batched=True, 
-                                    batch_size=2,
-                                    model_revision="3.1")
-print("Diarized dataset.")
-
-print(json.dumps(dataset_diarized, indent=4))
diff --git a/src/senselab/audio/tasks/data_augmentation.py b/src/senselab/audio/tasks/data_augmentation.py
index fc49c645..58ba61b1 100644
--- a/src/senselab/audio/tasks/data_augmentation.py
+++ b/src/senselab/audio/tasks/data_augmentation.py
@@ -1,54 +1,65 @@
 """This module implements some utilities for audio data augmentation."""
 
-from typing import Any, Dict
+from typing import List, Union
 
 import torch
-from datasets import Dataset
 from torch_audiomentations import Compose
 
-from senselab.utils.tasks.input_output import (
-    _from_dict_to_hf_dataset,
-    _from_hf_dataset_to_dict,
+from senselab.utils.data_structures.audio import (
+    Audio,
+    batch_audios,
+    unbatch_audios,
 )
+from senselab.utils.device import DeviceType, _select_device_and_dtype
 
 
-def augment_hf_dataset(
-    dataset: Dict[str, Any], augmentation: Compose, audio_column: str = "audio"
-) -> Dict[str, Any]:
-    """Resamples a Hugging Face `Dataset` object."""
-    hf_dataset = _from_dict_to_hf_dataset(dataset)
-
-    def _augment_hf_row(
-        row: Dataset, augmentation: Compose, audio_column: str
-    ) -> Dict[str, Any]:
-        waveform = row[audio_column]["array"]
-        sampling_rate = row[audio_column]["sampling_rate"]
-
-        # Ensure waveform is a PyTorch tensor
-        if not isinstance(waveform, torch.Tensor):
-            waveform = torch.tensor(waveform)
-        if waveform.dim() == 1:
-            waveform = waveform.unsqueeze(0).unsqueeze(
-                0
-            )  # [num_samples] -> [1, 1, num_samples]
-        elif waveform.dim() == 2:
-            waveform = waveform.unsqueeze(
-                1
-            )  # [batch_size, num_samples] -> [batch_size, 1, num_samples]
-
-        augmented_hf_row = augmentation(
-            waveform, sample_rate=sampling_rate
-        ).squeeze()
-
-        return {
-            "augmented_audio": {
-                "array": augmented_hf_row,
-                "sampling_rate": sampling_rate,
-            }
-        }
-
-    augmented_hf_dataset = hf_dataset.map(
-        lambda x: _augment_hf_row(x, augmentation, audio_column)
+def augment_audios(
+    audios: List[Audio], augmentation: Compose, device_options: Union[DeviceType, List[DeviceType]] = [DeviceType.CPU]
+) -> List[Audio]:
+    """Augments all provided audios with a given augmentation, either individually or all batched together.
+
+    Augment all audios with a user defined augmentation that can be a composition of multiple augmentations. This
+    augmentation is either performed on each audio individually or all of the audios provided are batched together
+    and run at once. NOTE: if batching, all audios must have the same sampling rate.
+
+    Args:
+        audios: List of Audios whose data will be augmented with the given augmentations
+        augmentation: A Composition of augmentations to run on each audio (uses torch-audiomentations), should have its
+            output_type set to "dict"
+        device_options: The device, or a List of possible devices, to use for augmenting. If the chosen device
+            is MPS or CUDA then the audios are all batched together, so for optimal performance, batching should
+            be done by passing a batch_size worth of audios ar a time
+
+    Returns:
+        List of audios that has passed the all of input audios through the provided augmentation. This does
+            not necessarily mean that the augmentation has been run on every audio. For more information,
+            see the torch-audiomentations documentation.
+    """
+    augmentation.output_type = "dict"
+    new_audios = []
+    device_type, dtype = _select_device_and_dtype(
+        compatible_devices=device_options if isinstance(device_options, List) else [device_options]
     )
-    augmented_hf_dataset = augmented_hf_dataset.remove_columns([audio_column])
-    return _from_hf_dataset_to_dict(augmented_hf_dataset)
+    if device_type == DeviceType.CPU:
+        for audio in audios:
+            audio_to_augment = audio.waveform.unsqueeze(0)
+            augmented_audio = augmentation(audio_to_augment, sample_rate=audio.sampling_rate).samples
+            new_audios.append(
+                Audio(
+                    waveform=torch.squeeze(augmented_audio),
+                    sampling_rate=audio.sampling_rate,
+                    metadata=audio.metadata.copy(),
+                    orig_path_or_id=audio.orig_path_or_id,
+                )
+            )
+    else:
+        batched_audios, sampling_rates, metadatas = batch_audios(audios)
+
+        batched_audios = batched_audios.to(device=torch.device(str(device_type)), dtype=dtype)
+        sampling_rate = sampling_rates[0] if isinstance(sampling_rates, List) else sampling_rates
+        augmented_audio = augmentation(batched_audios, sample_rate=sampling_rate).samples
+
+        augmented_audio = augmented_audio.detach().cpu()
+        return unbatch_audios(augmented_audio, sampling_rates, metadatas)
+
+    return new_audios
diff --git a/src/senselab/audio/tasks/features_extraction/torchaudio.py b/src/senselab/audio/tasks/features_extraction/torchaudio.py
new file mode 100644
index 00000000..53453ad5
--- /dev/null
+++ b/src/senselab/audio/tasks/features_extraction/torchaudio.py
@@ -0,0 +1 @@
+"""This module provides the implementation of torchaudio utilities for audio features extraction."""
diff --git a/src/senselab/audio/tasks/preprocessing.py b/src/senselab/audio/tasks/preprocessing.py
index d30bbb37..06ff3db4 100644
--- a/src/senselab/audio/tasks/preprocessing.py
+++ b/src/senselab/audio/tasks/preprocessing.py
@@ -1,49 +1,135 @@
 """This module implements some utilities for the preprocessing task."""
 
-from typing import Any, Dict
+from typing import List, Tuple
 
-import torch
 import torchaudio.functional as F
-from datasets import Dataset
-
-from senselab.utils.tasks.input_output import (
-    _from_dict_to_hf_dataset,
-    _from_hf_dataset_to_dict,
-)
-
-
-def resample_hf_dataset(
-    dataset: Dict[str, Any], resample_rate: int, rolloff: float = 0.99
-) -> Dict[str, Any]:
-    """Resamples a Hugging Face `Dataset` object."""
-    hf_dataset = _from_dict_to_hf_dataset(dataset)
-
-    def _resample_hf_row(
-        row: Dataset, resample_rate: int, rolloff: float = 0.99
-    ) -> Dict[str, Any]:
-        """Resamples audio data in a hf dataset row.
-
-        A lower rolloff will therefore reduce the amount of aliasing,
-        but it will also reduce some of the higher frequencies.
-        """
-        waveform = row["audio"]["array"]
-        # Ensure waveform is a PyTorch tensor
-        if not isinstance(waveform, torch.Tensor):
-            waveform = torch.tensor(waveform)
-        sampling_rate = row["audio"]["sampling_rate"]
-
-        resampled_waveform = F.resample(
-            waveform, sampling_rate, resample_rate, rolloff=rolloff
+
+from senselab.utils.data_structures.audio import Audio
+
+
+def resample_audios(audios: List[Audio], resample_rate: int, rolloff: float = 0.99) -> List[Audio]:
+    """Resamples all Audios to a given sampling rate.
+
+    Takes a list of audios and resamples each into the new sampling rate. Notably does not assume any
+    specific structure of the audios (can vary in stereo vs. mono as well as their original sampling rate)
+
+    Args:
+        audios: List of Audios to resample
+        resample_rate: Rate at which to resample the Audio
+        rolloff: The roll-off frequency of the filter, as a fraction of the Nyquist.
+            Lower values reduce anti-aliasing, but also reduce some of the highest frequencies
+
+    Returns:
+        List of Audios that have all been resampled to the given resampling rate
+    """
+    resampled_audios = []
+    for audio in audios:
+        new_metadata = audio.metadata.copy()
+        new_metadata_pre_proc = new_metadata.setdefault("preprocessing", [])
+        new_metadata_pre_proc.append(f"resample_{audio.sampling_rate}_to_{resample_rate}")
+
+        resampled = F.resample(audio.waveform, audio.sampling_rate, resample_rate, rolloff=rolloff)
+        resampled_audios.append(
+            Audio(
+                waveform=resampled,
+                sampling_rate=resample_rate,
+                metadata=new_metadata,
+                orig_path_or_id=audio.orig_path_or_id,
+            )
         )
+    return resampled_audios
+
+
+def downmix_audios_to_mono(audios: List[Audio]) -> List[Audio]:
+    """Downmixes a list of Audio objects to mono by averaging all channels.
+
+    Args:
+        audios (List[Audio]): A list of Audio objects with a tensor representing the audio waveform.
+                                 Shape: (num_channels, num_samples).
+
+    Returns:
+        List[Audio]: The list of audio objects with a mono waveform averaged from all channels. Shape: (num_samples).
+    """
+    down_mixed_audios = []
+    for audio in audios:
+        new_metadata = audio.metadata.copy()
+        new_metadata_pre_proc = new_metadata.setdefault("preprocessing", [])
+        new_metadata_pre_proc.append("downmix_mono_averaging")
+        down_mixed_audios.append(
+            Audio(
+                waveform=audio.waveform.mean(dim=0, keepdim=True),
+                sampling_rate=audio.sampling_rate,
+                metadata=new_metadata,
+                orig_path_or_id=audio.orig_path_or_id,
+            )
+        )
+
+    return down_mixed_audios
+
 
-        return {
-            "audio": {
-                "array": resampled_waveform,
-                "sampling_rate": resample_rate,
-            }
-        }
-
-    resampled_hf_dataset = hf_dataset.map(
-        lambda x: _resample_hf_row(x, resample_rate, rolloff)
-    )
-    return _from_hf_dataset_to_dict(resampled_hf_dataset)
+def select_channel_from_audios(audios: List[Audio], channel_index: int) -> List[Audio]:
+    """Selects a specific channel from a list of Audio objects.
+
+    Args:
+        audios (List[Audio]): A list of Audio objects with a tensor representing the audio waveform.
+                              Shape: (num_channels, num_samples).
+        channel_index (int): The index of the channel to select.
+
+    Returns:
+        List[Audio]: The list of audio objects with the selected channel. Shape: (1, num_samples).
+    """
+    mono_channel_audios = []
+    for audio in audios:
+        if audio.waveform.size(0) <= channel_index:  # should consider how much sense negative values make
+            raise ValueError("channel_index should be valid")
+
+        new_metadata = audio.metadata.copy()
+        new_metadata_pre_proc = new_metadata.setdefault("preprocessing", [])
+        new_metadata_pre_proc.append(f"downmix_mono_select_{channel_index}")
+
+        mono_channel_audios.append(
+            Audio(
+                waveform=audio.waveform[channel_index, :],
+                sampling_rate=audio.sampling_rate,
+                metadata=new_metadata,
+                orig_path_or_id=audio.orig_path_or_id,
+            )
+        )
+    return mono_channel_audios
+
+
+def chunk_audios(data: List[Tuple[Audio, Tuple[float, float]]]) -> List[Audio]:
+    """Chunks the input audios based on the start and end timestamp.
+
+    Args:
+        data: List of tuples containing an Audio object and a tuple with start and end (in seconds) for chunking.
+
+    Returns:
+        List of Audios that have been chunked based on the provided timestamps
+    """
+    chunked_audios = []
+
+    for audio, timestamps in data:
+        start, end = timestamps
+        if start < 0:
+            raise ValueError("Start time must be greater than or equal to 0.")
+        duration = audio.waveform.shape[1] / audio.sampling_rate
+        if end > duration:
+            raise ValueError(f"End time must be less than the duration of the audio file ({duration} seconds).")
+        start_sample = int(start * audio.sampling_rate)
+        end_sample = int(end * audio.sampling_rate)
+        chunked_waveform = audio.waveform[:, start_sample:end_sample]
+
+        new_metadata = audio.metadata.copy()
+        new_metadata_pre_proc = new_metadata.setdefault("preprocessing", [])
+        new_metadata_pre_proc.append(f"chunk_{start}_{end}")
+
+        chunked_audios.append(
+            Audio(
+                waveform=chunked_waveform,
+                sampling_rate=audio.sampling_rate,
+                metadata=new_metadata,
+                orig_path_or_id=audio.orig_path_or_id,
+            )
+        )
+    return chunked_audios
diff --git a/src/senselab/audio/tasks/preprocessing_pydra.py b/src/senselab/audio/tasks/preprocessing_pydra.py
index f3b122a7..c7c2b335 100644
--- a/src/senselab/audio/tasks/preprocessing_pydra.py
+++ b/src/senselab/audio/tasks/preprocessing_pydra.py
@@ -1,6 +1,15 @@
 """This module defines a pydra API for the preprocessing task."""
+
 import pydra
 
-from senselab.audio.tasks.preprocessing import resample_hf_dataset
+from senselab.audio.tasks.preprocessing import (
+    chunk_audios,
+    downmix_audios_to_mono,
+    resample_audios,
+    select_channel_from_audios,
+)
 
-resample_hf_dataset_pt = pydra.mark.task(resample_hf_dataset)
+resample_audios_pt = pydra.mark.task(resample_audios)
+downmix_audios_to_mono_pt = pydra.mark.task(downmix_audios_to_mono)
+chunk_audios_pt = pydra.mark.task(chunk_audios)
+select_channel_from_audios_pt = pydra.mark.task(select_channel_from_audios)
diff --git a/src/senselab/audio/tasks/speech_to_text.py b/src/senselab/audio/tasks/speech_to_text.py
index e49c9470..6387209a 100644
--- a/src/senselab/audio/tasks/speech_to_text.py
+++ b/src/senselab/audio/tasks/speech_to_text.py
@@ -6,7 +6,7 @@
 from datasets import Dataset
 from transformers import pipeline
 
-from senselab.utils.functions import DeviceType, _select_device_and_dtype
+from senselab.utils.device import DeviceType, _select_device_and_dtype
 from senselab.utils.hf import HFModel
 from senselab.utils.tasks.input_output import (
     _from_dict_to_hf_dataset,
@@ -42,16 +42,9 @@ def _prepare_hf_asr_pipeline(
     ) -> pipeline:
         """Prepare a Hugging Face ASR pipeline."""
         _ = HFModel(hf_model_id=model_id)  # check HF model is valid
-
-        if device is None:
-            device, torch_dtype = _select_device_and_dtype(
-                device_options=[DeviceType.CUDA, DeviceType.CPU]
-            )
-            # MPS is not supported for now
-        else:
-            device, torch_dtype = _select_device_and_dtype(
-                device_options=[device]
-            )
+        device, torch_dtype = _select_device_and_dtype(
+            user_preference=device, compatible_devices=[DeviceType.CUDA, DeviceType.CPU]
+        )
 
         pipe = pipeline(
             "automatic-speech-recognition",
diff --git a/src/senselab/audio/tasks/speech_to_text_evaluation.py b/src/senselab/audio/tasks/speech_to_text_evaluation.py
new file mode 100644
index 00000000..b950dcb9
--- /dev/null
+++ b/src/senselab/audio/tasks/speech_to_text_evaluation.py
@@ -0,0 +1,88 @@
+"""This module implements some utilities for evaluating a transcription."""
+
+import jiwer
+
+
+def calculate_wer(reference: str, hypothesis: str) -> float:
+    """Calculate the Word Error Rate (WER) between the reference and hypothesis.
+
+    Args:
+        reference (str): The ground truth text.
+        hypothesis (str): The predicted text.
+
+    Returns:
+        float: The WER score.
+
+    Examples:
+        >>> calculate_wer("hello world", "hello duck")
+        0.5
+    """
+    return jiwer.wer(reference, hypothesis)
+
+
+def calculate_mer(reference: str, hypothesis: str) -> float:
+    """Calculate the Match Error Rate (MER) between the reference and hypothesis.
+
+    Args:
+        reference (str): The ground truth text.
+        hypothesis (str): The predicted text.
+
+    Returns:
+        float: The MER score.
+
+    Examples:
+        >>> calculate_mer("hello world", "hello duck")
+        0.5
+    """
+    return jiwer.mer(reference, hypothesis)
+
+
+def calculate_wil(reference: str, hypothesis: str) -> float:
+    """Calculate the Word Information Lost (WIL) between the reference and hypothesis.
+
+    Args:
+        reference (str): The ground truth text.
+        hypothesis (str): The predicted text.
+
+    Returns:
+        float: The WIL score.
+
+    Examples:
+        >>> calculate_wil("hello world", "hello duck")
+        0.75
+    """
+    return jiwer.wil(reference, hypothesis)
+
+
+def calculate_wip(reference: str, hypothesis: str) -> float:
+    """Calculate the Word Information Preserved (WIP) between the reference and hypothesis.
+
+    Args:
+        reference (str): The ground truth text.
+        hypothesis (str): The predicted text.
+
+    Returns:
+        float: The WIP score.
+
+    Examples:
+        >>> calculate_wip("hello world", "hello duck")
+        0.25
+    """
+    return jiwer.wip(reference, hypothesis)
+
+
+def calculate_cer(reference: str, hypothesis: str) -> float:
+    """Calculate the Character Error Rate (CER) between the reference and hypothesis.
+
+    Args:
+        reference (str): The ground truth text.
+        hypothesis (str): The predicted text.
+
+    Returns:
+        float: The CER score.
+
+    Examples:
+        >>> calculate_cer("hello world", "hello duck")
+        0.45454545454545453
+    """
+    return jiwer.cer(reference, hypothesis)
diff --git a/src/senselab/audio/tasks/speech_to_text_evaluation_pydra.py b/src/senselab/audio/tasks/speech_to_text_evaluation_pydra.py
new file mode 100644
index 00000000..5731500c
--- /dev/null
+++ b/src/senselab/audio/tasks/speech_to_text_evaluation_pydra.py
@@ -0,0 +1,17 @@
+"""This module defines a pydra API for the speech to text evaluation task."""
+
+import pydra
+
+from senselab.audio.tasks.speech_to_text_evaluation import (
+    calculate_cer,
+    calculate_mer,
+    calculate_wer,
+    calculate_wil,
+    calculate_wip,
+)
+
+calculate_wer_pt = pydra.mark.task(calculate_wer)
+calculate_mer_pt = pydra.mark.task(calculate_mer)
+calculate_wil_pt = pydra.mark.task(calculate_wil)
+calculate_wip_pt = pydra.mark.task(calculate_wip)
+calculate_cer_pt = pydra.mark.task(calculate_cer)
diff --git a/src/senselab/audio/tasks/voice_cloning.py b/src/senselab/audio/tasks/voice_cloning.py
index 75fa7948..5e033787 100644
--- a/src/senselab/audio/tasks/voice_cloning.py
+++ b/src/senselab/audio/tasks/voice_cloning.py
@@ -1,11 +1,11 @@
 """This module implements some utilities for the voice cloning task."""
 
-from typing import Any, Dict, Optional
+from typing import Any, Dict, Optional, Tuple
 
 import torch
 from datasets import Dataset
 
-from senselab.utils.functions import DeviceType, _select_device_and_dtype
+from senselab.utils.device import DeviceType, _select_device_and_dtype
 from senselab.utils.tasks.input_output import (
     _from_dict_to_hf_dataset,
     _from_hf_dataset_to_dict,
@@ -30,13 +30,11 @@ def _setup_knn_vc_model(
         model_revision: str,
         prematched_vocoder: bool,
         device: Optional[DeviceType] = None,
-    ) -> Any:  # noqa: ANN401
+    ) -> Tuple[object, DeviceType, torch.dtype]:
         """Prepare a KNNVC pipeline."""
         repo_id = f"{model_id}:{model_revision}"
         device, torch_dtype = _select_device_and_dtype(
-            device_options=[device]
-            if device
-            else [DeviceType.CUDA, DeviceType.CPU]
+            user_preference=device, compatible_devices=[DeviceType.CUDA, DeviceType.CPU]
         )
         knn_vc = torch.hub.load(
             repo_id,
@@ -75,12 +73,8 @@ def _get_waveform(dataset: Dataset, column: str) -> torch.Tensor:
         out_wav = knn_vc_model.match(query_seq, matching_set, topk=topk)
         return {"cloned_waveform": out_wav}
 
-    hf_source_dataset = _from_dict_to_hf_dataset(
-        source_dataset, audio_columns=[source_audio_column]
-    )
-    hf_target_dataset = _from_dict_to_hf_dataset(
-        target_dataset, audio_columns=[target_audio_column]
-    )
+    hf_source_dataset = _from_dict_to_hf_dataset(source_dataset, audio_columns=[source_audio_column])
+    hf_target_dataset = _from_dict_to_hf_dataset(target_dataset, audio_columns=[target_audio_column])
 
     knn_vc, device, torch_dtype = _setup_knn_vc_model(
         model_id=model_id,
diff --git a/src/senselab/utils/constants.py b/src/senselab/utils/constants.py
new file mode 100644
index 00000000..cf3d616e
--- /dev/null
+++ b/src/senselab/utils/constants.py
@@ -0,0 +1,5 @@
+"""Constants used by Senselab."""
+
+import uuid
+
+SENSELAB_NAMESPACE = uuid.uuid3(uuid.NAMESPACE_URL, "https://github.com/sensein/senselab")
diff --git a/src/senselab/utils/data_structures/audio.py b/src/senselab/utils/data_structures/audio.py
new file mode 100644
index 00000000..611319e8
--- /dev/null
+++ b/src/senselab/utils/data_structures/audio.py
@@ -0,0 +1,158 @@
+"""Data structures relevant for audio tasks and pipelines.
+
+Contains data structures that are useful for audio tasks and pipelines that this package defines.
+The most basic unit is an Audio object which represents the necessary information of a loaded audio
+file and its corresponding metadata. Other functionality and abstract data types are provided for
+ease of maintaining the codebase and offering consistent public APIs.
+"""
+
+import uuid
+from typing import Dict, List, Optional, Tuple, Union
+
+import numpy as np
+import torch
+import torchaudio
+from pydantic import BaseModel, Field, ValidationInfo, field_validator
+
+from senselab.utils.constants import SENSELAB_NAMESPACE
+
+
+class Audio(BaseModel):
+    """Pydantic model for audio and its corresponding metadata.
+
+    Pydantic model for audio that holds the necessary attributes, the actual decoded audio data
+    and the sampling rate, to work with audio in python. Contains metadata information as needed
+    and has a unique identifier for every audio.
+
+    Attributes:
+        waveform: The actual audio data read from an audio file, stored as a torch.Tensor
+            of shape (num_channels, num_samples)
+        sampling_rate: The sampling rate of the audio file
+        orig_path_or_id: Optional str for the original path or an ID to track file over time
+        metadata: Optional metadata dictionary of information associated with this Audio instance
+            (e.g. participant demographics, audio settings, location information)
+    """
+
+    waveform: torch.Tensor
+    sampling_rate: int
+    orig_path_or_id: Optional[str] = Field(default_factory=lambda: str(uuid.uuid4()))
+    metadata: Dict = Field(default={})
+    model_config = {"arbitrary_types_allowed": True}
+
+    @field_validator("waveform", mode="before")
+    def convert_to_tensor(
+        cls, v: Union[List[float], List[List[float]], np.ndarray, torch.Tensor], info: ValidationInfo
+    ) -> torch.Tensor:
+        """Converts the audio data to torch.Tensor of shape (num_channels, num_samples)."""
+        temporary_tensor = None
+        if isinstance(v, list):
+            temporary_tensor = torch.tensor(v)
+        elif isinstance(v, np.ndarray):
+            temporary_tensor = torch.tensor(v)
+        elif isinstance(v, torch.Tensor):
+            temporary_tensor = v
+        else:
+            raise ValueError("Unsupported data type")
+
+        if len(temporary_tensor.shape) == 1:
+            # make the audio data [channels=1, samples]
+            temporary_tensor = temporary_tensor.unsqueeze(0)
+        return temporary_tensor
+
+    @classmethod
+    def from_filepath(cls, filepath: str, metadata: Dict = {}) -> "Audio":
+        """Creates an Audio instance from an audio file.
+
+        Args:
+            filepath: Filepath of the audio file to read from
+            metadata: Additional information associated with the audio file
+        """
+        array, sampling_rate = torchaudio.load(filepath)
+
+        return cls(waveform=array, sampling_rate=sampling_rate, orig_path_or_id=filepath, metadata=metadata)
+
+    def id(self) -> str:
+        """Generate a unique identifier for the Audio.
+
+        Generate a unique identifier for the Audio where equivalent waveforms and sampling
+        rates generate the same IDs.
+
+        Returns: String UUID of the Audio generated by an MD5 hash of the waveform and the sampling_rate
+        """
+        return str(uuid.uuid3(uuid.uuid3(SENSELAB_NAMESPACE, str(self.waveform)), str(self.sampling_rate)))
+
+    def __eq__(self, other: object) -> bool:
+        """Overloads the default BaseModel equality to correctly check equivalence, ignoring metadata."""
+        if isinstance(other, Audio):
+            return self.id() == other.id()
+        return False
+
+
+def batch_audios(audios: List[Audio]) -> Tuple[torch.Tensor, Union[int, List[int]], List[Dict]]:
+    """Batches the Audios together into a single Tensor, keeping individual Audio information separate.
+
+    Batch all of the Audios into a single Tensor of shape (len(audios), num_channels, num_samples).
+    Keeps the Audio information related to each sampling rate and metadata separate for each Audio to
+    allow for unbatching after running relevant functionality.
+
+    Args:
+        audios: List of audios to batch together. NOTE: Should all have the same number of channels
+            and is generally advised to have the same sampling rates if running functionality
+            that relies on the sampling rate.
+
+    Returns:
+        Returns a tuple of a Tensor that will have the shape (len(audios), num_channels, num_samples),
+        the sampling rate (an integer if all have the same sampling rate), and a list of each individual
+        audio's metadata information.
+
+    Raises:
+        RuntimeError: if all of the Audios do not have the same number of channels
+    """
+    sampling_rates = []
+    batched_audio = []
+    metadatas = []
+    for audio in audios:
+        sampling_rates.append(audio.sampling_rate)
+        batched_audio.append(audio.waveform)
+        metadatas.append(audio.metadata)
+
+    return_sampling_rates: List[int] | int = int(sampling_rates[0]) if len(set(sampling_rates)) == 1 else sampling_rates
+
+    return torch.stack(batched_audio), return_sampling_rates, metadatas
+
+
+def unbatch_audios(batched_audio: torch.Tensor, sampling_rates: int | List[int], metadatas: List[Dict]) -> List[Audio]:
+    """Unbatches Audios into a List of Audio objects.
+
+    Uses the batched Audios, their respective sampling rates, and their corresponding metadatas to create
+    a list of Audios.
+
+    Args:
+        batched_audio: torch.Tensor of shape (batch_size, num_channels, num_samples) to unstack
+        sampling_rates: The sampling rate of each batched audio if they differ or a single sampling rate for all of them
+        metadatas: The respective metadata for each of the batched audios
+
+    Returns:
+        List of Audio objects representing each of the Audios that were previously batched together
+
+    Raises:
+        ValueError if the batched_audio is not in the correct shape or if the number of batched_audios does not
+            match the amount of metadata and sampling rates (if they were provided as a List) that were provided.
+    """
+    if len(batched_audio.shape) != 3:
+        raise ValueError("Expected batched audios to be of shape (batch_size, num_channels, samples)")
+    elif batched_audio.shape[0] != len(metadatas) or (
+        isinstance(sampling_rates, List) and batched_audio.shape[0] != len(sampling_rates)
+    ):
+        raise ValueError(
+            "Expected sizes of batched_audio, sampling_rates (if provided as a litst) \
+                         and metadata to be equal"
+        )
+
+    audios = []
+    for i in range(len(metadatas)):
+        sampling_rate = sampling_rates[i] if isinstance(sampling_rates, List) else sampling_rates
+        metadata = metadatas[i]
+        audio = batched_audio[i]
+        audios.append(Audio(waveform=audio, sampling_rate=sampling_rate, metadata=metadata))
+    return audios
diff --git a/src/senselab/utils/data_structures/dataset.py b/src/senselab/utils/data_structures/dataset.py
new file mode 100644
index 00000000..51a4a595
--- /dev/null
+++ b/src/senselab/utils/data_structures/dataset.py
@@ -0,0 +1,217 @@
+"""Data structures relevant for managing datasets."""
+
+import math
+import uuid
+from typing import Any, Dict, List, Union, no_type_check
+
+from pydantic import BaseModel, Field, ValidationInfo, field_validator
+
+from senselab.utils.data_structures.audio import Audio
+from senselab.utils.data_structures.video import Video
+
+
+class Participant(BaseModel):
+    """Data structure for a participant in a dataset."""
+
+    id: str = Field(default_factory=lambda: str(uuid.uuid4()))
+    metadata: Dict = Field(default={})
+
+    @field_validator("id", mode="before")
+    def set_id(cls, v: str) -> str:
+        """Set the unique id of the participant."""
+        return v or str(uuid.uuid4())
+
+    def __eq__(self, other: object) -> bool:
+        """Overloads the default BaseModel equality to correctly check that ids are equivalent."""
+        if isinstance(other, Participant):
+            return self.id == other.id
+        return False
+
+
+class Session(BaseModel):
+    """Data structure for a session in a dataset."""
+
+    id: str = Field(default_factory=lambda: str(uuid.uuid4()))
+    metadata: Dict = Field(default={})
+
+    @field_validator("id", mode="before")
+    def set_id(cls, v: str) -> str:
+        """Set the unique id of the session."""
+        return v or str(uuid.uuid4())
+
+    def __eq__(self, other: object) -> bool:
+        """Overloads the default BaseModel equality to correctly check that ids are equivalent."""
+        if isinstance(other, Session):
+            return self.id == other.id
+        return False
+
+
+class SenselabDataset(BaseModel):
+    """Class for maintaining SenseLab datasets and functionalities.
+
+    Maintains collections of Audios, Videos, and metadata for use of the Senselab tools
+    and pipelines. Includes the ability to manage Sessions and Participants.
+
+    Attributes:
+        audios: List of Audios that are generated based on list of audio filepaths
+        videos: List of Videos generated from a list of video filepaths
+        metadata: Metadata related to the dataset overall but not necessarily the metadata of
+            indivudal audios in the dataset
+        sessions: Session ID mapping to Session instance
+        participants: Mapping of participant ID to a Participant instance
+    """
+
+    participants: Dict[str, Participant] = Field(default_factory=dict)
+    sessions: Dict[str, Session] = Field(default_factory=dict)
+    audios: List[Audio] = []
+    videos: List[Video] = []
+    metadata: Dict = Field(default={})
+
+    @field_validator("audios", mode="before")
+    @classmethod
+    def generate_audios_from_filepaths(cls, v: Union[List[str], List[Audio]], _: ValidationInfo) -> List[Audio]:
+        """Generate the audios in the dataset from a list of audio filepaths.
+
+        Generates the audios in the dataset by taking in a list of audio filepaths
+        or a list of Audios
+
+        Args:
+            v: Input for audios attribute that we're validating by generating the Audios if filepaths
+                are provided or just the list of Audios if pre-generated and passed in
+
+        Returns:
+            List of Audios that instantiates the audios attribute in the dataset
+        """
+        audio_list = []
+        if len(v) == 0:
+            return []
+        else:
+            for audio in v:
+                if isinstance(audio, Audio):
+                    audio_list.append(audio)
+                else:
+                    audio_list.append(Audio.from_filepath(audio))
+        return audio_list
+
+    @field_validator("videos", mode="before")
+    @classmethod
+    def generate_videos_from_filepaths(cls, v: Union[List[str], List[Video]], _: ValidationInfo) -> List[Video]:
+        """Generate the videos in the dataset from a list of video filepaths.
+
+        Generates the videos in the dataset by taking in a list of video filepaths
+        or a list of Videos
+
+        Args:
+            v: Input for videos attribute that we're validating by generating the Videos if filepaths
+                are provided or just the list of Videos if pre-generated and passed in
+
+        Returns:
+            List of Videos that instantiates the videos attribute in the dataset
+        """
+        video_list = []
+        if len(v) == 0:
+            return []
+        else:
+            for video in v:
+                if isinstance(video, Video):
+                    video_list.append(video)
+                elif isinstance(video, str):
+                    video_list.append(Video.from_filepath(video))
+
+                else:
+                    raise ValueError("Unsupported video list")
+        return video_list
+
+    @classmethod
+    @no_type_check
+    def create_bids_dataset(cls, bids_root_filepath: str) -> "SenselabDataset":
+        """Create a dataset from a BIDS organized directory.
+
+        Creates a new dataset based off of a BIDS directory structure as defined at
+        https://sensein.group/biometrics-book/updated_bids.html
+        """
+        pass
+
+    def create_audio_split_for_pydra_task(self, batch_size: int = 1) -> List[List[Audio]]:
+        """Splits the audio data for Pydra tasks.
+
+        Creates a split of the audio data that can be used for creating individual Pydra tasks using
+        the .split functionality. Splits the data such that the inputs for a Pydra workflow are either
+        optimized for the GPU's batch size or a single Audio per CPU thread.
+
+        Args:
+            batch_size: How to batch Audios for a Pydra task; defaults to 1 since CPU won't batch
+
+        Returns:
+            List of Lists of Audio where each List of Audios will be an input to a Pydra task.
+            Each of the sublists are either of size 1 for CPUs or at most batch_size for GPU optimization.
+
+        Raises:
+            ValueError if the batch size is invalid (less than 1)
+        """
+        if batch_size > 1:
+            # Creates batches of at most size batch_size except the last which contains the remainder of audios
+            return [
+                self.audios[batch_size * i : min(batch_size * (i + 1), len(self.audios))]
+                for i in range(math.ceil(len(self.audios) / batch_size))
+            ]
+        elif batch_size < 1:
+            raise ValueError("Batch size must be greater than or equal to 1")
+        else:
+            return [[audio] for audio in self.audios]
+
+    def audio_merge_from_pydra_task(self, audios_to_merge: List[List[Audio]]) -> None:
+        """Write later.
+
+        Logic Pydra:
+        audios: List of audios that want to give to task
+        split: List[List[Audios]] -> task List[Audio]
+        pydra task(List[Audio]) -> List[Audio]
+        merge(List[List[Audio]]) <- might be a wrapped instead of List of lists
+        TODO: Figure out what a merge behavior looks like from Pydra
+        """
+        self.audios = []
+        for audio_task_input in audios_to_merge:
+            for audio_output in audio_task_input:
+                self.audios.append(audio_output)
+
+    @field_validator("participants", mode="before")
+    def check_unique_participant_id(cls, v: Dict[str, Participant], values: Any) -> Dict[str, Participant]:  # noqa: ANN401
+        """Check if participant IDs are unique."""
+        print("type(values)")
+        print(type(values))
+        input("Press Enter to continue...")
+        participants = values.get("participants", {})
+        for participant_id, _ in v.items():
+            if participant_id in participants:
+                raise ValueError(f"Participant with ID {participant_id} already exists.")
+        return v
+
+    @field_validator("sessions", mode="before")
+    def check_unique_session_id(cls, v: Dict[str, Session], values: Any) -> Dict[str, Session]:  # noqa: ANN401
+        """Check if session IDs are unique."""
+        sessions = values.get("sessions", {})
+        for session_id, _ in v.items():
+            if session_id in sessions:
+                raise ValueError(f"Session with ID {session_id} already exists.")
+        return v
+
+    def add_participant(self, participant: Participant) -> None:
+        """Add a participant to the dataset."""
+        if participant.id in self.participants:
+            raise ValueError(f"Participant with ID {participant.id} already exists.")
+        self.participants[participant.id] = participant
+
+    def add_session(self, session: Session) -> None:
+        """Add a session to the dataset."""
+        if session.id in self.sessions:
+            raise ValueError(f"Session with ID {session.id} already exists.")
+        self.sessions[session.id] = session
+
+    def get_participants(self) -> List[Participant]:
+        """Get the list of participants in the dataset."""
+        return list(self.participants.values())
+
+    def get_sessions(self) -> List[Session]:
+        """Get the list of sessions in the dataset."""
+        return list(self.sessions.values())
diff --git a/src/senselab/utils/data_structures/video.py b/src/senselab/utils/data_structures/video.py
new file mode 100644
index 00000000..f9a6d75e
--- /dev/null
+++ b/src/senselab/utils/data_structures/video.py
@@ -0,0 +1,79 @@
+"""Data structures relevant for video tasks and pipelines."""
+
+import uuid
+from typing import Dict, Optional
+
+import torch
+from pydantic import BaseModel, Field, ValidationInfo, field_validator
+from torchvision.io import read_video
+
+from senselab.utils.constants import SENSELAB_NAMESPACE
+from senselab.utils.data_structures.audio import Audio
+
+
+class Video(BaseModel):
+    """Pydantic model for video and its corresponding metadata.
+
+    Pydantic model for video that holds the necessary attributes, the actual decoded video data
+    and the frame rate, to work with videos in python. Contains metadata information as needed
+    and has a unique identifier for every video.
+
+    Attributes:
+        frames: Represent the video as a Tensor of all of its frames, each of which is an image
+            that we represent through a Tensor of (C, H, W)
+        frame_rate: Also known as the frames per second (fps), defines the time component
+            of a video (often an integer but some use cases of float approximations)
+        audio: the audio associated with the Video (optional)
+        orig_path_or_id: Optional str for the original path or an ID to track file over time
+        metadata: Optional metadata dictionary of information associated with this Video instance
+            (e.g. participant demographics, video settings, location information)
+    """
+
+    frames: torch.Tensor
+    frame_rate: float
+    audio: Optional[Audio]
+    orig_path_or_id: Optional[str] = Field(default_factory=lambda: str(uuid.uuid4()))
+    metadata: Dict = Field(default={})
+    model_config = {"arbitrary_types_allowed": True}
+
+    @field_validator("frames", mode="before")
+    def check_frames(cls, v: torch.Tensor, _: ValidationInfo) -> torch.Tensor:
+        """Check that the frames are the correct Tensor shape of (T,C,H,W)."""
+        if len(v.shape) != 4:
+            raise ValueError(
+                "Expected frames to be of shape (T, C, H, W) where T is the number of frames, \
+                             C is the channels, and H and W are the height and width"
+            )
+        return v
+
+    @classmethod
+    def from_filepath(cls, filepath: str, metadata: Dict = {}) -> "Video":
+        """Creates a Video instance from a video file.
+
+        Args:
+            filepath: Filepath of the video file to read from
+            metadata: Additional information associated with the video file
+        """
+        v_frames, a_frames, v_metadata = read_video(filename=filepath, output_format="TCHW")
+        v_fps = v_metadata["video_fps"]
+        a_fps = v_metadata["audio_fps"]
+        v_audio = Audio(waveform=a_frames, sampling_rate=a_fps, orig_path_or_id=filepath)
+
+        return cls(frames=v_frames, frame_rate=v_fps, audio=v_audio, orig_path_or_id=filepath, metadata=metadata)
+
+    def id(self) -> str:
+        """Generate a unique identifier for the Video.
+
+        Generate a unique identifier for the Video where equivalent video frames and frame rate
+        and audio generate the same IDs.
+
+        Returns: String UUID of the Video generated by an MD5 hash of the frames and the frame rate and audio
+        """
+        temp_hash = uuid.uuid3(uuid.uuid3(SENSELAB_NAMESPACE, str(self.frames)), str(self.frame_rate))
+        return str(temp_hash) if not self.audio else str(uuid.uuid3(temp_hash, self.audio.id()))
+
+    def __eq__(self, other: object) -> bool:
+        """Overloads the default BaseModel equality to correctly check equivalence, ignoring metadata."""
+        if isinstance(other, Audio):
+            return self.id() == other.id()
+        return False
diff --git a/src/senselab/utils/device.py b/src/senselab/utils/device.py
new file mode 100644
index 00000000..2143d933
--- /dev/null
+++ b/src/senselab/utils/device.py
@@ -0,0 +1,77 @@
+"""Utility functions for utilizing different devices in Senselab."""
+
+from enum import Enum
+from typing import Optional
+
+import torch
+
+
+class DeviceType(Enum):
+    """Device types for PyTorch operations."""
+
+    CPU: str = "cpu"
+    CUDA: str = "cuda"
+    MPS: str = "mps"
+
+
+DTYPE_MAP = {DeviceType.CPU: torch.float32, DeviceType.CUDA: torch.float16, DeviceType.MPS: torch.float32}
+
+
+def _select_device_and_dtype(
+    user_preference: Optional[DeviceType] = None,
+    compatible_devices: list[DeviceType] = [
+        DeviceType.CPU,
+        DeviceType.CUDA,
+        DeviceType.MPS,
+    ],
+) -> tuple[DeviceType, torch.dtype]:
+    """Determines the device and data type for PyTorch operations.
+
+    Allows users to give preferences for DeviceType, but determines based
+    on compatible and available devices. Chooses the fastest option if no
+    user preference is given.
+
+    Args:
+        user_preference: Optional DeviceType that the user wants to use
+        compatible_devices: DeviceTypes that work with the functionality of the method calling this
+    Returns:
+        Tuple of (DeviceType, torch.dtype) where the device is both available and compatible and the
+            dtype is the best performing dtype for that DeviceType
+    Raises:
+        ValueError: if the user specifies a preference that is not available or compatible and a safety
+            call if no devices are available or compatible (we believe this to be impossible to trigger).
+    """
+    available_devices = [DeviceType.CPU]
+    if torch.cuda.is_available():
+        available_devices.append(DeviceType.CUDA)
+
+    if torch.backends.mps.is_available():
+        available_devices.append(DeviceType.MPS)
+
+    # Check compatible and available
+    useable_devices = []
+    for device in available_devices:
+        if device in compatible_devices:
+            useable_devices.append(device)
+
+    # User preference or fastest option
+    if user_preference:
+        if user_preference not in useable_devices:
+            raise ValueError(
+                "Requested user DeviceType is either not available or\
+                             compatible with this functionality."
+            )
+        else:
+            return user_preference, DTYPE_MAP[user_preference]
+    else:
+        if DeviceType.CUDA in useable_devices:
+            return DeviceType.CUDA, DTYPE_MAP[DeviceType.CUDA]
+        elif DeviceType.MPS in useable_devices:
+            return DeviceType.MPS, DTYPE_MAP[DeviceType.MPS]
+        elif DeviceType.CPU in useable_devices:
+            return DeviceType.CPU, DTYPE_MAP[DeviceType.CPU]
+        else:
+            raise ValueError(
+                "Something went really wrong and no devices were available or \
+                             compatible."
+            )
diff --git a/src/senselab/utils/functions.py b/src/senselab/utils/functions.py
index 65798534..4267e107 100644
--- a/src/senselab/utils/functions.py
+++ b/src/senselab/utils/functions.py
@@ -1,41 +1,8 @@
 """Utility functions for senselab."""
 
 import os
-from enum import Enum
 from typing import List
 
-import torch
-
-
-class DeviceType(Enum):
-    """Device types for PyTorch operations."""
-
-    CPU: str = "cpu"
-    CUDA: str = "cuda"
-    MPS: str = "mps"
-
-
-def _select_device_and_dtype(
-    device_options: list[DeviceType] = [
-        DeviceType.CPU,
-        DeviceType.CUDA,
-        DeviceType.MPS,
-    ],
-) -> tuple[DeviceType, torch.dtype]:
-    """Determines the device and data type for PyTorch operations."""
-    if torch.cuda.is_available() and DeviceType.CUDA in device_options:
-        device = DeviceType.CUDA
-        torch_dtype = torch.float16  # Using half precision for CUDA
-    elif torch.backends.mps.is_available() and DeviceType.MPS in device_options:
-        device = DeviceType.MPS
-        torch_dtype = torch.float32
-        # Default to float32 on MPS for better precision
-    else:
-        device = DeviceType.CPU
-        torch_dtype = torch.float32
-        # Default to float32 on CPU for better precision
-    return device, torch_dtype
-
 
 def get_common_directory(files: List[str]) -> str:
     """A function to get the common directory from a list of file paths.
diff --git a/src/senselab/utils/hf.py b/src/senselab/utils/hf.py
index 468e0a02..636d7a88 100644
--- a/src/senselab/utils/hf.py
+++ b/src/senselab/utils/hf.py
@@ -16,27 +16,27 @@ class HFModel(BaseModel):
 
     @field_validator("hf_model_id")
     def validate_hf_model_id(cls, value: str) -> str:
-        """Validate the hf_model_id."""
+        """Validate the hf_model_id.
+
+        # TODO: enabling using HF token
+        """
         if not value:
             raise ValueError("hf_model_id cannot be empty")
         if not os.path.isfile(value) and not _check_hf_repo_exists(
-            value, "model", None
+            repo_id=value, revision="main", repo_type="model", token=None
         ):
             raise ValueError("hf_model_id is not a valid Hugging Face model")
         return value
 
 
 def _check_hf_repo_exists(
-    repo_id: str, repo_type: str, token: Optional[str] = None
+    repo_id: str, revision: str = "main", repo_type: str = "model", token: Optional[str] = None
 ) -> bool:
     """Private function to check if a Hugging Face repository exists."""
     api = HfApi()
     try:
-        repo_refs = api.list_repo_refs(
-            repo_id=repo_id, repo_type=repo_type, token=token
-        )
-        if repo_refs.branches:
-            return True
-    except Exception as e:
-        raise RuntimeError(f"An error occurred: {e}")
-    return False
+        api.list_repo_commits(repo_id=repo_id, revision=revision, repo_type=repo_type, token=token)
+        return True
+    except Exception:
+        # raise RuntimeError(f"An error occurred: {e}")
+        return False
diff --git a/src/senselab/utils/tasks/cca_cka.py b/src/senselab/utils/tasks/cca_cka.py
new file mode 100644
index 00000000..8e83b317
--- /dev/null
+++ b/src/senselab/utils/tasks/cca_cka.py
@@ -0,0 +1,133 @@
+"""This module is for computing CCA and CKA."""
+
+from enum import Enum
+
+import torch
+
+
+def compute_cca(features_x: torch.Tensor, features_y: torch.Tensor) -> float:
+    """Compute the mean squared CCA correlation (R^2_{CCA}).
+
+    Args:
+        features_x (torch.Tensor): A num_examples x num_features matrix of features.
+        features_y (torch.Tensor): A num_examples x num_features matrix of features.
+
+    Returns:
+        float: The mean squared CCA correlations between X and Y.
+    """
+    qx, _ = torch.linalg.qr(features_x)
+    qy, _ = torch.linalg.qr(features_y)
+    result = torch.norm(qx.t() @ qy) ** 2 / min(features_x.shape[1], features_y.shape[1])
+    return result.item() if isinstance(result, torch.Tensor) else float(result)
+
+
+class CKAKernelType(Enum):
+    """CKA kernel types."""
+
+    LINEAR = "linear"
+    RBF = "rbf"
+
+
+def compute_cka(
+    features_x: torch.Tensor,
+    features_y: torch.Tensor,
+    kernel: CKAKernelType = CKAKernelType.LINEAR,
+    threshold: float = 1.0,
+) -> float:
+    """Compute CKA between feature matrices.
+
+    Args:
+        features_x (torch.Tensor): A num_examples x num_features matrix of features.
+        features_y (torch.Tensor): A num_examples x num_features matrix of features.
+        kernel (CKAKernelType): Type of kernel to use (CKAKernelType.LINEAR or CKAKernelType.RBF).
+        Default is CKAKernelType.LINEAR.
+        threshold (float): Fraction of median Euclidean distance to use as RBF kernel bandwidth
+            (used only if kernel is CKAKernelType.RBF).
+
+    Returns:
+        float: The value of CKA between X and Y.
+    """
+
+    def _gram_linear(x: torch.Tensor) -> torch.Tensor:
+        """Compute Gram (kernel) matrix for a linear kernel.
+
+        Args:
+            x (torch.Tensor): A num_examples x num_features matrix of features.
+
+        Returns:
+            torch.Tensor: A num_examples x num_examples Gram matrix of examples.
+        """
+        return x @ x.t()
+
+    def _gram_rbf(x: torch.Tensor, threshold: float = 1.0) -> torch.Tensor:
+        """Compute Gram (kernel) matrix for an RBF kernel.
+
+        Args:
+            x (torch.Tensor): A num_examples x num_features matrix of features.
+            threshold (float): Fraction of median Euclidean distance to use as RBF kernel bandwidth.
+
+        Returns:
+            torch.Tensor: A num_examples x num_examples Gram matrix of examples.
+        """
+        dot_products = x @ x.t()
+        sq_norms = torch.diag(dot_products)
+        sq_distances = -2 * dot_products + sq_norms[:, None] + sq_norms[None, :]
+        sq_median_distance = torch.median(sq_distances)
+        return torch.exp(-sq_distances / (2 * threshold**2 * sq_median_distance))
+
+    def _center_gram(gram: torch.Tensor) -> torch.Tensor:
+        """Center a symmetric Gram matrix.
+
+        This is equivalent to centering the (possibly infinite-dimensional) features
+        induced by the kernel before computing the Gram matrix.
+
+        Args:
+            gram (torch.Tensor): A num_examples x num_examples symmetric matrix.
+
+        Returns:
+            torch.Tensor: A symmetric matrix with centered columns and rows.
+
+        Raises:
+            ValueError: If the input is not a symmetric matrix.
+        """
+        if not torch.allclose(gram, gram.t()):
+            raise ValueError("Input must be a symmetric matrix.")
+
+        n = gram.size(0)
+        unit = torch.ones(n, n, device=gram.device)
+        eye = torch.eye(n, device=gram.device)
+        unit = unit / n
+        haitch = eye - unit
+        centered_gram = haitch.mm(gram).mm(haitch)
+        return centered_gram
+
+    def _cka(gram_x: torch.Tensor, gram_y: torch.Tensor) -> torch.Tensor:
+        """Compute CKA.
+
+        Args:
+            gram_x (torch.Tensor): A num_examples x num_examples Gram matrix.
+            gram_y (torch.Tensor): A num_examples x num_examples Gram matrix.
+
+        Returns:
+            float: The value of CKA between X and Y.
+        """
+        gram_x = _center_gram(gram_x)
+        gram_y = _center_gram(gram_y)
+
+        scaled_hsic = torch.sum(gram_x * gram_y)
+
+        normalization_x = torch.norm(gram_x)
+        normalization_y = torch.norm(gram_y)
+        return scaled_hsic / (normalization_x * normalization_y)
+
+    if kernel == CKAKernelType.LINEAR:
+        gram_x = _gram_linear(features_x)
+        gram_y = _gram_linear(features_y)
+    elif kernel == CKAKernelType.RBF:
+        gram_x = _gram_rbf(features_x, threshold)
+        gram_y = _gram_rbf(features_y, threshold)
+    else:
+        raise ValueError("Unsupported kernel type. Use CKAKernelType.LINEAR or CKAKernelType.RBF.")
+
+    result = _cka(gram_x, gram_y)
+    return result.item() if isinstance(result, torch.Tensor) else float(result)
diff --git a/src/senselab/utils/tasks/cca_cka_pydra.py b/src/senselab/utils/tasks/cca_cka_pydra.py
new file mode 100644
index 00000000..8ddbf997
--- /dev/null
+++ b/src/senselab/utils/tasks/cca_cka_pydra.py
@@ -0,0 +1,8 @@
+"""This module defines a pydra API for the CCA and CKA tasks."""
+
+import pydra
+
+from senselab.utils.tasks.cca_cka import compute_cca, compute_cka
+
+compute_cca_pt = pydra.mark.task(compute_cca)
+compute_cka_pt = pydra.mark.task(compute_cka)
diff --git a/src/senselab/utils/tasks/cosine_similarity.py b/src/senselab/utils/tasks/cosine_similarity.py
new file mode 100644
index 00000000..71a0508b
--- /dev/null
+++ b/src/senselab/utils/tasks/cosine_similarity.py
@@ -0,0 +1,43 @@
+"""This module provides the implementation of cosine similarity."""
+
+import torch
+
+
+def compute_cosine_similarity(tensor1: torch.Tensor, tensor2: torch.Tensor) -> float:
+    """Compute the cosine similarity between two torch tensors.
+
+    Args:
+        tensor1 (Tensor): The first input tensor.
+        tensor2 (Tensor): The second input tensor.
+
+    Returns:
+        float: The cosine similarity between the two input tensors.
+
+    Raises:
+        ValueError: If the input tensors are not of the same shape.
+
+    Examples:
+        >>> tensor1 = torch.tensor([1.0, 2.0, 3.0])
+        >>> tensor2 = torch.tensor([4.0, 5.0, 6.0])
+        >>> cosine_similarity(tensor1, tensor2)
+        0.9746318461970762
+
+        >>> tensor1 = torch.tensor([1.0, 0.0, -1.0])
+        >>> tensor2 = torch.tensor([-1.0, 0.0, 1.0])
+        >>> cosine_similarity(tensor1, tensor2)
+        -1.0
+
+    Note:
+        This function assumes the input tensors are 1-dimensional and have the same shape.
+    """
+    if tensor1.dim() != 1 or tensor2.dim() != 1:
+        raise ValueError("Input tensors must be 1-dimensional")
+    if tensor1.shape != tensor2.shape:
+        raise ValueError("Input tensors must have the same shape")
+
+    dot_product = torch.dot(tensor1, tensor2)
+    norm_tensor1 = torch.norm(tensor1)
+    norm_tensor2 = torch.norm(tensor2)
+
+    cosine_sim = dot_product / (norm_tensor1 * norm_tensor2)
+    return cosine_sim.item()
diff --git a/src/senselab/utils/tasks/cosine_similarity_pydra.py b/src/senselab/utils/tasks/cosine_similarity_pydra.py
new file mode 100644
index 00000000..4a3e7dd3
--- /dev/null
+++ b/src/senselab/utils/tasks/cosine_similarity_pydra.py
@@ -0,0 +1,7 @@
+"""This module defines a pydra API for computing cosine similarity."""
+
+import pydra
+
+from senselab.utils.tasks.cosine_similarity import compute_cosine_similarity
+
+cosine_similarity_pt = pydra.mark.task(compute_cosine_similarity)
diff --git a/src/senselab/utils/tasks/cross_correlation.py b/src/senselab/utils/tasks/cross_correlation.py
new file mode 100644
index 00000000..d88bb32d
--- /dev/null
+++ b/src/senselab/utils/tasks/cross_correlation.py
@@ -0,0 +1,51 @@
+"""This module contains functions for computing the normalized cross-correlation between two signals."""
+
+import numpy as np
+import torch
+from scipy.signal import correlate
+
+
+def compute_normalized_cross_correlation(signal1: torch.Tensor, signal2: torch.Tensor) -> torch.Tensor:
+    """Calculate the normalized cross-correlation between two signals.
+
+    Args:
+        signal1 (torch.Tensor): The first input signal as a PyTorch tensor.
+        signal2 (torch.Tensor): The second input signal as a PyTorch tensor.
+
+    Returns:
+        torch.Tensor: The normalized cross-correlation value between the two input signals.
+
+    Examples:
+        >>> signal1 = torch.tensor([1.0, 2.0, 3.0, 4.0, 5.0])
+        >>> signal2 = torch.tensor([2.0, 3.0, 4.0])
+        >>> normalized_cross_correlation(signal1, signal2)
+        Tensor([0.30151134, 0.51298918, 0.77459667, 0.9486833 , 0.90453403, 0.70710678, 0.43643578])
+
+    Note:
+        This function assumes the input signals are one-dimensional
+        and contain sufficient elements for meaningful cross-correlation.
+    """
+    # Ensure the inputs are 1D tensors
+    if signal1.ndim != 1 or signal2.ndim != 1:
+        raise ValueError("Input signals must be one-dimensional")
+
+    # Convert PyTorch tensors to NumPy arrays
+    signal1 = signal1.numpy()
+    signal2 = signal2.numpy()
+
+    # Calculate the energy of each signal
+    energy_signal1 = np.sum(signal1**2)
+    energy_signal2 = np.sum(signal2**2)
+
+    # Check for zero energy to avoid division by zero
+    if energy_signal1 == 0 or energy_signal2 == 0:
+        raise ZeroDivisionError("One of the input signals has zero energy, causing division by zero in normalization")
+
+    # Compute the cross-correlation
+    cross_correlation = correlate(signal1, signal2)
+
+    # Calculate the normalized cross-correlation
+    normalized_cross_correlation = cross_correlation / np.sqrt(energy_signal1 * energy_signal2)
+
+    print(normalized_cross_correlation)
+    return torch.Tensor(normalized_cross_correlation)
diff --git a/src/senselab/utils/tasks/cross_correlation_pydra.py b/src/senselab/utils/tasks/cross_correlation_pydra.py
new file mode 100644
index 00000000..b42ff20c
--- /dev/null
+++ b/src/senselab/utils/tasks/cross_correlation_pydra.py
@@ -0,0 +1,7 @@
+"""This module defines a pydra API for computing cross correlation between two signals."""
+
+import pydra
+
+from senselab.utils.tasks.cross_correlation import compute_normalized_cross_correlation
+
+compute_normalized_cross_correlation_pt = pydra.mark.task(compute_normalized_cross_correlation)
diff --git a/src/senselab/utils/tasks/eer.py b/src/senselab/utils/tasks/eer.py
new file mode 100644
index 00000000..59b12479
--- /dev/null
+++ b/src/senselab/utils/tasks/eer.py
@@ -0,0 +1,19 @@
+"""This module implements some utilities for computing the Equal Error Rate (EER)."""
+
+from typing import Tuple
+
+import torch
+from speechbrain.utils.metric_stats import EER
+
+
+def compute_eer(predictions: torch.Tensor, targets: torch.Tensor) -> Tuple[float, float]:
+    """Compute the Equal Error Rate (EER).
+
+    Args:
+        predictions (torch.Tensor): A 1D tensor of predictions.
+        targets (torch.Tensor): A 1D tensor of targets.
+
+    Returns:
+        Tuple[float, float]: The EER and the threshold for the EER.
+    """
+    return EER(predictions, targets)
diff --git a/src/senselab/utils/tasks/eer_pydra.py b/src/senselab/utils/tasks/eer_pydra.py
new file mode 100644
index 00000000..6c9ab83d
--- /dev/null
+++ b/src/senselab/utils/tasks/eer_pydra.py
@@ -0,0 +1,7 @@
+"""This module defines a pydra API for computing EER."""
+
+import pydra
+
+from senselab.utils.tasks.eer import compute_eer
+
+compute_eer_pt = pydra.mark.task(compute_eer)
diff --git a/src/senselab/utils/tasks/input_output.py b/src/senselab/utils/tasks/input_output.py
index 892ca655..850b9b2e 100644
--- a/src/senselab/utils/tasks/input_output.py
+++ b/src/senselab/utils/tasks/input_output.py
@@ -34,9 +34,7 @@ def _from_files_to_dataset(files: List[File]) -> Dataset:
     return _from_hf_dataset_to_dict(dataset)
 
 
-def read_dataset_from_disk(
-    input_path: str, split: str, streaming: bool = False
-) -> Dict[str, Any]:
+def read_dataset_from_disk(input_path: str, split: str, streaming: bool = False) -> Dict[str, Any]:
     """Loads a Hugging Face `Dataset` object from disk.
 
     It determines the format based on the file extension or directory.
@@ -58,9 +56,7 @@ def read_dataset_from_disk(
         return _from_hf_dataset_to_dict(dataset)
     except Exception as e:
         # Generic error handling, e.g., network issues, data loading issues
-        raise RuntimeError(
-            f"An error occurred while loading the dataset: {str(e)}"
-        )
+        raise RuntimeError(f"An error occurred while loading the dataset: {str(e)}")
 
 
 def read_dataset_from_hub(
@@ -73,10 +69,9 @@ def read_dataset_from_hub(
 
     It includes support for private repositories.
     """
-    if not _check_hf_repo_exists(remote_repository, "dataset", hf_token):
+    if not _check_hf_repo_exists(remote_repository, "main", "dataset", hf_token):
         raise RuntimeError(
-            f"The repository {remote_repository} - {revision} - {split}"
-            " does not exist or could not be accessed."
+            f"The repository {remote_repository} - {revision} - {split}" " does not exist or could not be accessed."
         )
 
     # Load the dataset
@@ -89,9 +84,7 @@ def read_dataset_from_hub(
         )
     except Exception as e:
         # Generic error handling, e.g., network issues, data loading issues
-        raise RuntimeError(
-            f"An error occurred while loading the dataset: {str(e)}"
-        )
+        raise RuntimeError(f"An error occurred while loading the dataset: {str(e)}")
 
     return _from_hf_dataset_to_dict(dataset)
 
@@ -117,9 +110,7 @@ def push_dataset_to_hub(
                 token=hf_token,
             )
         else:
-            hf_dataset.push_to_hub(
-                repo_id=remote_repository, revision=revision, split=split
-            )
+            hf_dataset.push_to_hub(repo_id=remote_repository, revision=revision, split=split)
     except Exception as e:
         raise RuntimeError(f"Failed to push dataset to the hub: {str(e)}")
     return
@@ -140,27 +131,21 @@ def save_dataset_to_disk(
         output_path = os.path.join(output_directory, output_name)
         # No extension for Arrow, it's a directory
     else:
-        output_path = os.path.join(
-            output_directory, f"{output_name}.{output_format}"
-        )
+        output_path = os.path.join(output_directory, f"{output_name}.{output_format}")
 
     # Create the output directory, ignore error if it already exists
     os.makedirs(output_directory, exist_ok=True)
 
     if output_format == "parquet":
 
-        def _save_hf_dataset_as_parquet(
-            dataset: Dataset, output_path: str
-        ) -> None:
+        def _save_hf_dataset_as_parquet(dataset: Dataset, output_path: str) -> None:
             """Saves a Hugging Face `Dataset` object to parquet format."""
             dataset.to_parquet(output_path)
 
         _save_hf_dataset_as_parquet(hf_dataset, output_path)
     elif output_format == "json":
 
-        def _save_hf_dataset_as_json(
-            dataset: Dataset, output_path: str
-        ) -> None:
+        def _save_hf_dataset_as_json(dataset: Dataset, output_path: str) -> None:
             """Saves a Hugging Face `Dataset` object to json format."""
             dataset.to_json(output_path)
 
@@ -181,9 +166,7 @@ def _save_hf_dataset_as_sql(dataset: Dataset, output_path: str) -> None:
         _save_hf_dataset_as_sql(hf_dataset, output_path)
     elif output_format == "arrow":
 
-        def _save_hf_dataset_as_arrow(
-            dataset: Dataset, output_path: str
-        ) -> None:
+        def _save_hf_dataset_as_arrow(dataset: Dataset, output_path: str) -> None:
             """Saves a Hugging Face `Dataset` object in Apache Arrow format."""
             dataset.save_to_disk(output_path)
 
diff --git a/src/tests/audio/tasks/data_augmentation_test.py b/src/tests/audio/tasks/data_augmentation_test.py
new file mode 100644
index 00000000..07ef3df2
--- /dev/null
+++ b/src/tests/audio/tasks/data_augmentation_test.py
@@ -0,0 +1,39 @@
+"""Module for testing data augmentation on audios."""
+
+import torch
+from torch_audiomentations import Compose, PolarityInversion
+
+from senselab.audio.tasks.data_augmentation import augment_audios
+from senselab.utils.data_structures.audio import Audio
+from senselab.utils.data_structures.dataset import SenselabDataset
+
+
+def test_audio_data_augmentation() -> None:
+    """Test data augmentations using the new Audio data types."""
+    apply_augmentation = Compose(transforms=[PolarityInversion(p=1, output_type="dict")], output_type="dict")
+
+    audio_paths = [
+        "src/tests/data_for_testing/audio_48khz_mono_16bits.wav",
+        "src/tests/data_for_testing/audio_48khz_stereo_16bits.wav",
+    ]
+    audio_dataset_from_paths = SenselabDataset(audios=audio_paths)
+    mono_audio, stereo_audio = audio_dataset_from_paths.create_audio_split_for_pydra_task()
+    mono_inverted = augment_audios(mono_audio, apply_augmentation)
+    stereo_inverted = augment_audios(stereo_audio, apply_augmentation)
+    assert torch.equal(
+        mono_audio[0].waveform, -1 * mono_inverted[0].waveform
+    ), "Audio should have been inverted by the augmentation"
+    assert torch.equal(
+        stereo_audio[0].waveform, -1 * stereo_inverted[0].waveform
+    ), "Audio should have been inverted by the augmentation and not affected by stereo audio"
+
+    batched_audio = SenselabDataset(
+        audios=[
+            Audio(waveform=stereo_audio[0].waveform[0], sampling_rate=stereo_audio[0].sampling_rate),
+            Audio(waveform=stereo_audio[0].waveform[1], sampling_rate=stereo_audio[0].sampling_rate),
+        ]
+    ).create_audio_split_for_pydra_task(2)
+    batch_inverted = augment_audios(batched_audio[0], apply_augmentation)
+    assert torch.equal(batched_audio[0][0].waveform, -1 * batch_inverted[0].waveform) and torch.equal(
+        batched_audio[0][1].waveform, -1 * batch_inverted[1].waveform
+    )
diff --git a/src/tests/audio/tasks/preprocessing_test.py b/src/tests/audio/tasks/preprocessing_test.py
new file mode 100644
index 00000000..e7c09c35
--- /dev/null
+++ b/src/tests/audio/tasks/preprocessing_test.py
@@ -0,0 +1,115 @@
+"""Module for testing the preprocessing functionality of Audios."""
+
+import math
+
+import pytest
+import torch
+
+from senselab.audio.tasks.preprocessing import (
+    chunk_audios,
+    downmix_audios_to_mono,
+    resample_audios,
+    select_channel_from_audios,
+)
+from senselab.utils.data_structures.audio import Audio
+
+
+def test_resample_audios() -> None:
+    """Tests functionality for resampling Audio objects."""
+    resample_rate = 36000
+    mono_audio = Audio.from_filepath(("src/tests/data_for_testing/audio_48khz_mono_16bits.wav"))
+    resampled_expected_size = mono_audio.waveform.shape[1] / 48000 * resample_rate
+
+    resampled_audio = resample_audios([mono_audio], resample_rate)
+    assert math.ceil(resampled_expected_size) == resampled_audio[0].waveform.shape[1]
+
+    stereo_audio = Audio.from_filepath(("src/tests/data_for_testing/audio_48khz_stereo_16bits.wav"))
+    resampled_expected_size = stereo_audio.waveform.shape[1] / 48000 * resample_rate
+
+    resampled_audio = resample_audios([stereo_audio], resample_rate)
+    assert math.ceil(resampled_expected_size) == resampled_audio[0].waveform.shape[1]
+
+
+def test_downmix_audios() -> None:
+    """Tests functionality for downmixing Audio objects."""
+    mono_audio = Audio.from_filepath(("src/tests/data_for_testing/audio_48khz_mono_16bits.wav"))
+    down_mixed_audios = downmix_audios_to_mono([mono_audio])
+    assert down_mixed_audios[0].waveform.dim() == 2, "Mono audio should maintain the (num_channels, num_samples) shape"
+    assert down_mixed_audios[0].waveform.shape[0] == 1, "Mono audio should remain mono after downmixing"
+    assert down_mixed_audios[0].waveform.size(1) == mono_audio.waveform.size(
+        1
+    ), "Downmixed mono audio should have correct number of samples"
+
+    stereo_audio = Audio.from_filepath("src/tests/data_for_testing/audio_48khz_stereo_16bits.wav")
+    down_mixed_audios = downmix_audios_to_mono([stereo_audio])
+    assert down_mixed_audios[0].waveform.dim() == 2, "Mono audio should maintain the (num_channels, num_samples) shape"
+    assert down_mixed_audios[0].waveform.shape[0] == 1, "Stereo audio should become mono after downmixing"
+    assert down_mixed_audios[0].waveform.size(1) == stereo_audio.waveform.size(
+        1
+    ), "Downmixed stereo audio should have correct number of samples"
+    assert torch.isclose(
+        down_mixed_audios[0].waveform, stereo_audio.waveform.mean(dim=0, keepdim=True)
+    ).all(), "Downmixed audio should be the mean of the stereo channels"
+
+
+def test_select_channel_from_audios() -> None:
+    """Tests functionality for selecting a specific channel from Audio objects."""
+
+    def check_selected_channel(audio: Audio, channel_to_select: int) -> None:
+        """Checks if the original selected audio channel is the same as the returned selected audio channel."""
+        selected_channel_audios = select_channel_from_audios([audio], channel_to_select)
+        assert selected_channel_audios[0].waveform.shape[0] == 1, "Selected channel audio should be mono"
+        assert (
+            selected_channel_audios[0].waveform.shape[1] == audio.waveform.shape[1]
+        ), "Selected channel audio should have the correct number of samples"
+        assert torch.equal(
+            selected_channel_audios[0].waveform[0, :], audio.waveform[channel_to_select, :]
+        ), "Selected channel audio should be the same as the selected channel of the original audio"
+
+    channel_to_select = 0
+    mono_audio = Audio.from_filepath("src/tests/data_for_testing/audio_48khz_mono_16bits.wav")
+    check_selected_channel(mono_audio, channel_to_select)
+
+    stereo_audio = Audio.from_filepath("src/tests/data_for_testing/audio_48khz_stereo_16bits.wav")
+    check_selected_channel(stereo_audio, channel_to_select)
+
+    channel_to_select = 1
+    check_selected_channel(stereo_audio, channel_to_select)
+
+
+def test_chunk_audios() -> None:
+    """Tests functionality for chunking Audio objects."""
+    # Test data setup
+    audio_path = "src/tests/data_for_testing/audio_48khz_mono_16bits.wav"
+    audio = Audio.from_filepath(audio_path)
+    audio_duration = audio.waveform.shape[1] / audio.sampling_rate
+
+    # Test cases
+    test_data = [
+        (audio, (0.0, 1.0)),  # Normal case within bounds
+        (audio, (1.0, 2.0)),  # Normal case within bounds
+    ]
+
+    chunked_audios = chunk_audios(test_data)
+
+    # Verify chunked audio lengths
+    for i, (original_audio, (start, end)) in enumerate(test_data):
+        start_sample = int(start * original_audio.sampling_rate)
+        end_sample = int(end * original_audio.sampling_rate)
+        expected_length = end_sample - start_sample
+        assert chunked_audios[i].waveform.shape[1] == expected_length
+    # Test case where start time is negative
+    with pytest.raises(ValueError, match="Start time must be greater than or equal to 0."):
+        chunk_audios([(audio, (-1.0, 1.0))])
+
+    # Test case where end time exceeds duration
+    try:
+        chunk_audios([(audio, (0.0, audio_duration + 1.0))])
+    except ValueError as e:
+        assert str(e) == f"End time must be less than the duration of the audio file ({audio_duration} seconds)."
+    else:
+        pytest.fail("ValueError not raised")
+
+    # Test case where end time equals duration
+    chunked_audio = chunk_audios([(audio, (0.0, audio_duration))])[0]
+    assert chunked_audio.waveform.shape[1] == audio.waveform.shape[1]
diff --git a/src/tests/audio/tasks/speech_to_text_evaluation_test.py b/src/tests/audio/tasks/speech_to_text_evaluation_test.py
new file mode 100644
index 00000000..f58027ad
--- /dev/null
+++ b/src/tests/audio/tasks/speech_to_text_evaluation_test.py
@@ -0,0 +1,64 @@
+"""Module for testing speech-to-text evaluation."""
+
+from senselab.audio.tasks.speech_to_text_evaluation import (
+    calculate_cer,
+    calculate_mer,
+    calculate_wer,
+    calculate_wil,
+    calculate_wip,
+)
+
+
+def test_calculate_wer() -> None:
+    """Tests the calculation of Word Error Rate (WER)."""
+    reference = "hello world"
+    hypothesis = "hello duck"
+    expected_wer = 0.5
+
+    wer = calculate_wer(reference, hypothesis)
+
+    assert wer == expected_wer, f"Expected WER: {expected_wer}, but got: {wer}"
+
+
+def test_calculate_mer() -> None:
+    """Tests the calculation of Match Error Rate (MER)."""
+    reference = "hello world"
+    hypothesis = "hello duck"
+    expected_mer = 0.5
+
+    mer = calculate_mer(reference, hypothesis)
+
+    assert mer == expected_mer, f"Expected MER: {expected_mer}, but got: {mer}"
+
+
+def test_calculate_wil() -> None:
+    """Tests the calculation of Word Information Lost (WIL)."""
+    reference = "hello world"
+    hypothesis = "hello duck"
+    expected_wil = 0.75
+
+    wil = calculate_wil(reference, hypothesis)
+
+    assert wil == expected_wil, f"Expected WIL: {expected_wil}, but got: {wil}"
+
+
+def test_calculate_wip() -> None:
+    """Tests the calculation of Word Information Preserved (WIP)."""
+    reference = "hello world"
+    hypothesis = "hello duck"
+    expected_wip = 0.25
+
+    wip = calculate_wip(reference, hypothesis)
+
+    assert wip == expected_wip, f"Expected WIP: {expected_wip}, but got: {wip}"
+
+
+def test_calculate_cer() -> None:
+    """Tests the calculation of Character Error Rate (CER)."""
+    reference = "hello world"
+    hypothesis = "hello duck"
+    expected_cer = 0.45454545454545453
+
+    cer = calculate_cer(reference, hypothesis)
+
+    assert cer == expected_cer, f"Expected CER: {expected_cer}, but got: {cer}"
diff --git a/src/tests/utils/data_structures/audio_test.py b/src/tests/utils/data_structures/audio_test.py
new file mode 100644
index 00000000..b375202f
--- /dev/null
+++ b/src/tests/utils/data_structures/audio_test.py
@@ -0,0 +1,45 @@
+"""Module for testing Audio data structures."""
+
+import torch
+import torchaudio
+
+from senselab.utils.data_structures.audio import Audio
+
+
+def test_audio_creation() -> None:
+    """Tests the functionality for creating data instances."""
+    mono_audio_data, mono_sr = torchaudio.load("src/tests/data_for_testing/audio_48khz_mono_16bits.wav")
+    stereo_audio_data, stereo_sr = torchaudio.load("src/tests/data_for_testing/audio_48khz_stereo_16bits.wav")
+
+    mono_audio = Audio(
+        waveform=mono_audio_data,
+        sampling_rate=mono_sr,
+        orig_path_or_id="src/tests/data_for_testing/audio_48khz_mono_16bits.wav",
+    )
+    mono_audio_from_file = Audio.from_filepath("src/tests/data_for_testing/audio_48khz_mono_16bits.wav")
+    assert mono_audio == mono_audio_from_file, "Mono audios are not exactly equivalent"
+
+    stereo_audio = Audio(
+        waveform=stereo_audio_data,
+        sampling_rate=stereo_sr,
+        orig_path_or_id="src/tests/data_for_testing/audio_48khz_stereo_16bits.wav",
+    )
+    stereo_audio_uuid = Audio(waveform=stereo_audio_data, sampling_rate=stereo_sr)
+    stereo_audio_from_file = Audio.from_filepath("src/tests/data_for_testing/audio_48khz_stereo_16bits.wav")
+    assert stereo_audio == stereo_audio_from_file, "Stereo audios are not exactly equivalent"
+    assert stereo_audio == stereo_audio_uuid, "Stereo audio with different IDs should still be equivalent"
+
+    audio_single_tensor = Audio(waveform=mono_audio_data[0], sampling_rate=mono_sr)
+    assert torch.equal(
+        mono_audio.waveform, audio_single_tensor.waveform
+    ), "Mono audios of tensor shape (num_samples,) should be reshaped to (1, num_samples)"
+
+    audio_from_list = Audio(waveform=list(mono_audio_data[0]), sampling_rate=mono_sr)
+    audio_from_list_of_lists = Audio(waveform=[list(mono_audio_data[0])], sampling_rate=mono_sr)
+    audio_from_numpy = Audio(waveform=mono_audio_data.numpy(), sampling_rate=mono_sr)
+
+    assert torch.equal(mono_audio.waveform, audio_from_list.waveform), "List audio should've been converted to Tensor"
+    assert torch.equal(
+        mono_audio.waveform, audio_from_list_of_lists.waveform
+    ), "List of lists audio should've been converted to Tensor"
+    assert torch.equal(mono_audio.waveform, audio_from_numpy.waveform), "NumPy audio should've been converted to Tensor"
diff --git a/src/tests/utils/data_structures/dataset_test.py b/src/tests/utils/data_structures/dataset_test.py
new file mode 100644
index 00000000..b9c06d29
--- /dev/null
+++ b/src/tests/utils/data_structures/dataset_test.py
@@ -0,0 +1,153 @@
+"""Module for testing the Participant, Session, and SenselabDataset classes."""
+
+import pytest
+import torchaudio
+
+from senselab.utils.data_structures.audio import Audio
+from senselab.utils.data_structures.dataset import Participant, SenselabDataset, Session
+
+
+def test_create_participant() -> None:
+    """Test creating a participant."""
+    participant = Participant(metadata={"name": "John Doe"})
+    assert isinstance(participant, Participant)
+    assert participant.metadata["name"] == "John Doe"
+
+
+def test_create_session() -> None:
+    """Test creating a session."""
+    session = Session(metadata={"description": "Initial session"})
+    assert isinstance(session, Session)
+    assert session.metadata["description"] == "Initial session"
+
+
+def test_add_participant() -> None:
+    """Test adding a participant to the dataset."""
+    dataset = SenselabDataset()
+    participant = Participant()
+    dataset.add_participant(participant)
+    assert participant.id in dataset.participants
+
+
+def test_add_duplicate_participant() -> None:
+    """Test adding a duplicate participant to the dataset."""
+    dataset = SenselabDataset()
+    participant = Participant()
+    dataset.add_participant(participant)
+    with pytest.raises(ValueError):
+        dataset.add_participant(participant)
+
+
+def test_add_session() -> None:
+    """Test adding a session to the dataset."""
+    dataset = SenselabDataset()
+    session = Session()
+    dataset.add_session(session)
+    assert session.id in dataset.sessions
+
+
+def test_add_duplicate_session() -> None:
+    """Test adding a duplicate session to the dataset."""
+    dataset = SenselabDataset()
+    session = Session()
+    dataset.add_session(session)
+    with pytest.raises(ValueError):
+        dataset.add_session(session)
+
+
+def test_get_participants() -> None:
+    """Test getting the list of participants."""
+    dataset = SenselabDataset()
+    participant1 = Participant()
+    participant2 = Participant()
+    dataset.add_participant(participant1)
+    dataset.add_participant(participant2)
+    participants = dataset.get_participants()
+    assert len(participants) == 2
+    assert participant1 in participants
+    assert participant2 in participants
+
+
+def test_get_sessions() -> None:
+    """Test getting the list of sessions."""
+    dataset = SenselabDataset()
+    session1 = Session()
+    session2 = Session()
+    dataset.add_session(session1)
+    dataset.add_session(session2)
+    sessions = dataset.get_sessions()
+    assert len(sessions) == 2
+    assert session1 in sessions
+    assert session2 in sessions
+
+
+def test_audio_dataset_creation() -> None:
+    """Tests the creation of AudioDatasets with various ways of generating them."""
+    audio_paths = [
+        "src/tests/data_for_testing/audio_48khz_mono_16bits.wav",
+        "src/tests/data_for_testing/audio_48khz_stereo_16bits.wav",
+    ]
+
+    mono_audio_data, mono_sr = torchaudio.load("src/tests/data_for_testing/audio_48khz_mono_16bits.wav")
+    stereo_audio_data, stereo_sr = torchaudio.load("src/tests/data_for_testing/audio_48khz_stereo_16bits.wav")
+    mono_audio = Audio(
+        waveform=mono_audio_data,
+        sampling_rate=mono_sr,
+        orig_path_or_id="src/tests/data_for_testing/audio_48khz_mono_16bits.wav",
+    )
+    stereo_audio = Audio(
+        waveform=stereo_audio_data,
+        sampling_rate=stereo_sr,
+        orig_path_or_id="src/tests/data_for_testing/audio_48khz_stereo_16bits.wav",
+    )
+
+    audio_dataset_from_paths = SenselabDataset(audios=audio_paths)
+    assert (
+        audio_dataset_from_paths.audios[0] == mono_audio and audio_dataset_from_paths.audios[1] == stereo_audio
+    ), "Audio data generated from paths does not equal creating the individually"
+
+    audio_dataset_from_data = SenselabDataset(
+        audios=[
+            Audio(waveform=mono_audio_data, sampling_rate=mono_sr),
+            Audio(waveform=stereo_audio_data, sampling_rate=stereo_sr),
+        ],
+    )
+
+    assert audio_dataset_from_paths == audio_dataset_from_data, "Audio datasets should be equivalent"
+
+
+def test_audio_dataset_splits() -> None:
+    """Tests the AudioDataset split functionality."""
+    audio_paths = [
+        "src/tests/data_for_testing/audio_48khz_mono_16bits.wav",
+        "src/tests/data_for_testing/audio_48khz_stereo_16bits.wav",
+    ]
+    audio_dataset = SenselabDataset(audios=audio_paths)
+    mono_audio_data, mono_sr = torchaudio.load("src/tests/data_for_testing/audio_48khz_mono_16bits.wav")
+    stereo_audio_data, stereo_sr = torchaudio.load("src/tests/data_for_testing/audio_48khz_stereo_16bits.wav")
+    mono_audio = Audio(
+        waveform=mono_audio_data,
+        sampling_rate=mono_sr,
+        orig_path_or_id="src/tests/data_for_testing/audio_48khz_mono_16bits.wav",
+    )
+    stereo_audio = Audio(
+        waveform=stereo_audio_data,
+        sampling_rate=stereo_sr,
+        orig_path_or_id="src/tests/data_for_testing/audio_48khz_stereo_16bits.wav",
+    )
+
+    no_param_cpu_split = audio_dataset.create_audio_split_for_pydra_task()
+    assert no_param_cpu_split == [
+        [mono_audio],
+        [stereo_audio],
+    ], "Default split should have been a list of each audio in its own list"
+
+    gpu_split_exact = audio_dataset.create_audio_split_for_pydra_task(2)
+    assert gpu_split_exact == [
+        [mono_audio, stereo_audio]
+    ], "Exact GPU split should generate a list with one list of all of the audios"
+
+    gpu_excess_split = audio_dataset.create_audio_split_for_pydra_task(4)
+    assert gpu_excess_split == [
+        [mono_audio, stereo_audio]
+    ], "Excess GPU split should generate a list with one list of all of the audios, unpadded"
diff --git a/src/tests/utils/tasks/cca_cka_test.py b/src/tests/utils/tasks/cca_cka_test.py
new file mode 100644
index 00000000..90f02769
--- /dev/null
+++ b/src/tests/utils/tasks/cca_cka_test.py
@@ -0,0 +1,53 @@
+"""Module for testing the CCA and CKA functions."""
+
+import torch
+
+from senselab.utils.tasks.cca_cka import CKAKernelType, compute_cca, compute_cka
+
+
+def test_compute_cca() -> None:
+    """Test compute_cca function with random input tensors."""
+    # Create input tensors
+    features_x = torch.tensor([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])
+    features_y = torch.tensor([[2.0, 4.0], [6.0, 8.0], [10.0, 12.0]])
+    expected = 1.0  # Since features_y is a linear transformation of features_x, CCA should be perfect.
+
+    # Call the compute_cca function
+    cca_value = compute_cca(features_x, features_y)
+
+    # Assert that the result is a float
+    assert isinstance(cca_value, float), "Output should be a float."
+
+    assert torch.isclose(torch.tensor(cca_value), torch.tensor(expected), atol=1e-6)
+
+
+def test_compute_cka_linear() -> None:
+    """Test compute_cka function with linear kernel and random input tensors."""
+    # Create input tensors
+    features_x = torch.tensor([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])
+    features_y = torch.tensor([[2.0, 4.0], [6.0, 8.0], [10.0, 12.0]])
+    expected = 1.0  # Since features_y is a linear transformation of features_x, linear CKA should be perfect.
+
+    # Call the compute_cka function with linear kernel
+    cka_value = compute_cka(features_x, features_y, kernel=CKAKernelType.LINEAR)
+
+    # Assert that the result is a float
+    assert isinstance(cka_value, float), "Output should be a float."
+
+    assert torch.isclose(torch.tensor(cka_value), torch.tensor(expected), atol=1e-6)
+
+
+def test_compute_cka_rbf() -> None:
+    """Test compute_cka function with RBF kernel and random input tensors."""
+    # Create input tensors
+    features_x = torch.tensor([[1.0, 0.0], [0.0, 1.0], [1.0, 1.0]])
+    features_y = torch.tensor([[1.0, 0.0], [0.0, 1.0], [1.0, 1.0]])
+    expected = 1.0  # Since features_y is the same as features_x, RBF CKA should be perfect.
+
+    # Call the compute_cka function with rbf kernel
+    cka_value = compute_cka(features_x, features_y, kernel=CKAKernelType.RBF)
+
+    # Assert that the result is a float
+    assert isinstance(cka_value, float), "Output should be a float."
+
+    assert torch.isclose(torch.tensor(cka_value), torch.tensor(expected), atol=1e-6)
diff --git a/src/tests/utils/tasks/cosine_similarity_test.py b/src/tests/utils/tasks/cosine_similarity_test.py
new file mode 100644
index 00000000..8e02b6ac
--- /dev/null
+++ b/src/tests/utils/tasks/cosine_similarity_test.py
@@ -0,0 +1,47 @@
+"""This module contains unit tests for the cosine similarity function."""
+
+import pytest
+import torch
+
+from senselab.utils.tasks.cosine_similarity import compute_cosine_similarity
+
+
+def test_cosine_similarity_identical_vectors() -> None:
+    """Test cosine similarity for identical vectors."""
+    tensor1 = torch.tensor([1.0, 2.0, 3.0])
+    tensor2 = torch.tensor([1.0, 2.0, 3.0])
+    similarity = compute_cosine_similarity(tensor1, tensor2)
+    assert torch.isclose(torch.tensor(similarity), torch.tensor(1.0), atol=1e-6)
+
+
+def test_cosine_similarity_opposite_vectors() -> None:
+    """Test cosine similarity for opposite vectors."""
+    tensor1 = torch.tensor([1.0, 0.0, -1.0])
+    tensor2 = torch.tensor([-1.0, 0.0, 1.0])
+    similarity = compute_cosine_similarity(tensor1, tensor2)
+    assert torch.isclose(torch.tensor(similarity), torch.tensor(-1.0), atol=1e-6)
+
+
+def test_cosine_similarity_orthogonal_vectors() -> None:
+    """Test cosine similarity for orthogonal vectors."""
+    tensor1 = torch.tensor([1.0, 0.0])
+    tensor2 = torch.tensor([0.0, 1.0])
+    similarity = compute_cosine_similarity(tensor1, tensor2)
+    assert torch.isclose(torch.tensor(similarity), torch.tensor(0.0), atol=1e-6)
+
+
+def test_cosine_similarity_non_identical_vectors() -> None:
+    """Test cosine similarity for non-identical but non-orthogonal vectors."""
+    tensor1 = torch.tensor([1.0, 2.0, 3.0])
+    tensor2 = torch.tensor([4.0, 5.0, 6.0])
+    expected_value = 0.9746318461970762
+    similarity = compute_cosine_similarity(tensor1, tensor2)
+    assert torch.isclose(torch.tensor(similarity), torch.tensor(expected_value), atol=1e-6)
+
+
+def test_cosine_similarity_different_shapes() -> None:
+    """Test cosine similarity for tensors of different shapes, expecting a ValueError."""
+    tensor1 = torch.tensor([1.0, 2.0])
+    tensor2 = torch.tensor([1.0, 2.0, 3.0])
+    with pytest.raises(ValueError):
+        compute_cosine_similarity(tensor1, tensor2)
diff --git a/src/tests/utils/tasks/cross_correlation_test.py b/src/tests/utils/tasks/cross_correlation_test.py
new file mode 100644
index 00000000..4c84ada2
--- /dev/null
+++ b/src/tests/utils/tasks/cross_correlation_test.py
@@ -0,0 +1,53 @@
+"""Module for testing the compute_normalized_cross_correlation function."""
+
+import pytest
+import torch
+
+from senselab.utils.tasks.cross_correlation import compute_normalized_cross_correlation
+
+
+def test_normalized_cross_correlation_basic() -> None:
+    """Test normalized cross-correlation for basic identical signals."""
+    signal1 = torch.tensor([1.0, 1.0])
+    signal2 = torch.tensor([1.0, 1.0])
+    expected_result = torch.tensor([0.5, 1.0, 0.5], dtype=torch.float32)
+    result = compute_normalized_cross_correlation(signal1, signal2)
+    assert torch.allclose(result, expected_result, atol=1e-4), f"Expected {expected_result}, but got {result}"
+
+
+def test_normalized_cross_correlation_different_lengths() -> None:
+    """Test normalized cross-correlation for signals of different lengths."""
+    signal1 = torch.tensor([1.0, 2.0, 1.0])
+    signal2 = torch.tensor([1.0, 2.0])
+    expected_result = torch.tensor([0.3651, 0.9129, 0.7303, 0.1826], dtype=torch.float32)
+    result = compute_normalized_cross_correlation(signal1, signal2)
+    assert torch.allclose(result, expected_result, atol=1e-4), f"Expected {expected_result}, but got {result}"
+
+
+def test_normalized_cross_correlation_zero_signal() -> None:
+    """Test normalized cross-correlation with a zero signal."""
+    signal1 = torch.tensor([0.0, 0.0, 0.0, 0.0])
+    signal2 = torch.tensor([1.0, 2.0, 3.0])
+    with pytest.raises(ZeroDivisionError):
+        compute_normalized_cross_correlation(signal1, signal2)
+
+
+def test_normalized_cross_correlation_empty_signal() -> None:
+    """Test normalized cross-correlation with an empty signal."""
+    signal1 = torch.tensor([])
+    signal2 = torch.tensor([1.0, 2.0, 3.0])
+    with pytest.raises(ZeroDivisionError):
+        compute_normalized_cross_correlation(signal1, signal2)
+
+
+def test_normalized_cross_correlation_non_1d_signal() -> None:
+    """Test normalized cross-correlation with non-1D signals."""
+    signal1 = torch.tensor([[1.0, 2.0], [3.0, 4.0]])
+    signal2 = torch.tensor([1.0, 2.0, 3.0])
+    with pytest.raises(ValueError):
+        compute_normalized_cross_correlation(signal1, signal2)
+
+    signal1 = torch.tensor([1.0, 2.0, 3.0])
+    signal2 = torch.tensor([[1.0, 2.0], [3.0, 4.0]])
+    with pytest.raises(ValueError):
+        compute_normalized_cross_correlation(signal1, signal2)
diff --git a/src/tests/utils/tasks/eer_test.py b/src/tests/utils/tasks/eer_test.py
new file mode 100644
index 00000000..46324073
--- /dev/null
+++ b/src/tests/utils/tasks/eer_test.py
@@ -0,0 +1,28 @@
+"""This module contains unit tests for the EER function."""
+
+import torch
+
+from senselab.utils.tasks.eer import compute_eer
+
+
+def test_compute_eer() -> None:
+    """Test that the EER is computed correctly for perfectly separable data."""
+    predictions = torch.tensor([0.6, 0.7, 0.8, 0.5])
+    targets = torch.tensor([0.4, 0.3, 0.2, 0.1])
+    eer, threshold = compute_eer(predictions, targets)
+    # Since we expect perfect separation, the EER should be 0
+    assert eer == 0.0, "EER should be 0 for perfectly separable data"
+    assert 0 <= threshold <= 1, "Threshold should be between 0 and 1"
+
+
+def test_compute_eer_random() -> None:
+    """Test that the EER is computed correctly for random predictions and targets."""
+    # Set random seed for reproducibility
+    torch.manual_seed(42)
+    predictions = torch.rand(100)
+    targets = torch.randint(0, 2, (100,))
+    eer, threshold = compute_eer(predictions, targets)
+    assert isinstance(eer, float), "EER should be a float"
+    assert isinstance(threshold, float), "Threshold should be a float"
+    assert 0 <= eer <= 1, "EER should be between 0 and 1"
+    assert 0 <= threshold <= 1, "Threshold should be between 0 and 1"