Add tests for datasets and coverage report (#130)

Didanny · github-actions[bot] · mikeheddes · web-flow · commit c604323aa101 · 2023-03-06T21:27:40.000-08:00
* Update readme

* Add test for uci benchmark metadata

* Add test for all dataset downloads

* [github-action] formatting fixes

* Add test for report and score

* [github-action] formatting fixes

* Remove main

* [github-action] formatting fixes

* Remove parenthesis

* Remove try block

* Exclude UCIHAR

* Add escape characters

* Add quotes

* Exclude UCIHAR

* Excelude CCPP

* Excliude collection

* [github-action] formatting fixes

* Debug dataset downloads

* Debug dataset downloads

* Debug dataset downloads

* [github-action] formatting fixes

* Debug dataset downloads

* [github-action] formatting fixes

* Debug dataset downloads

* Debug dataset downloads

* [github-action] formatting fixes

* Add openpyxl dependency

* Fix error in UCIHAR dataset

* [github-action] formatting fixes

* Add coverage to the dev requirements

* Update readme

* Fix command in readme

* Split dataset download test

* Add tests for base class

* [github-action] formatting fixes

* Add test for resonator

* [github-action] formatting fixes

* Delete data dir only ones

---------

Co-authored-by: github-actions[bot] &lt;github-actions[bot]@users.noreply.github.com&gt;
Co-authored-by: Mike Heddes &lt;mikeheddes@gmail.com&gt;
diff --git a/README.md b/README.md
@@ -118,6 +118,14 @@ To create a clean build, remove the `/build` and `/docs/generated` directories.
 2. Create a new GitHub release. Set the tag according to [PEP 440](https://peps.python.org/pep-0440/), e.g., v1.5.2, and provide a clear description of the changes. You can use GitHub's "auto-generate release notes" button. Look at previous releases for examples.
 3. A GitHub release triggers a GitHub action that builds the library and publishes it to PyPi and Conda in addition to the documentation website.
 
+### Running tests
+
+To run the unit tests located in [`torchhd/tests`](https://github.com/hyperdimensional-computing/torchhd/tree/main/torchhd/tests) do the following:
+1. Use `pip install -r dev-requirements.txt` to install the required development packages.
+2. Then run the tests using just `pytest`.
+
+Optionally, to measure the code coverage use `coverage run -m --omit="torchhd/tests/**" pytest` to create the coverage report. You can then view this report with `coverage report`.
+
 ### License
 
 This library is [MIT licensed](https://github.com/hyperdimensional-computing/torchhd/blob/main/LICENSE).
diff --git a/conda/meta.yaml b/conda/meta.yaml
@@ -23,6 +23,7 @@ requirements:
     - scipy
     - pandas
     - requests
+    - openpyxl
     - tqdm
 
 test:
diff --git a/dev-requirements.txt b/dev-requirements.txt
@@ -7,4 +7,6 @@ numpy
 flake8
 pytest
 black
-tqdm
+tqdm
+openpyxl
+coverage
diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -5,5 +5,6 @@ scipy
 requests
 tqdm
 numpy
+openpyxl
 sphinx
 sphinx-rtd-theme
diff --git a/setup.py b/setup.py
@@ -25,6 +25,7 @@
         "numpy",
         "requests",
         "tqdm",
+        "openpyxl",
     ],
     packages=find_packages(exclude=["docs", "torchhd.tests", "examples"]),
     python_requires=">=3.6, <4",
diff --git a/torchhd/datasets/beijing_air_quality.py b/torchhd/datasets/beijing_air_quality.py
@@ -177,14 +177,14 @@ def _load_data(self):
         # Map the wind directions to a category identifier (int)
         # # Save the mapping for user referencing
         self.wind_directions = tuple(sorted(list(set(data.wind_direction))))
-        data.loc[:, "wind_direction"] = data.wind_direction.apply(
+        data.wind_direction = data.wind_direction.apply(
             lambda x: self.wind_directions.index(x)
         )
 
         # Map the stations to a category identifier (int)
         # Save the mapping for user referencing
         self.stations = tuple(sorted(list(set(data.station))))
-        data.loc[:, "station"] = data.station.apply(lambda x: self.stations.index(x))
+        data.station = data.station.apply(lambda x: self.stations.index(x))
 
         categorical = data[self.categorical_columns]
         self.categorical_data = torch.tensor(categorical.values, dtype=torch.long)
diff --git a/torchhd/datasets/ucihar.py b/torchhd/datasets/ucihar.py
@@ -23,6 +23,7 @@
 #
 import os
 import os.path as path
+import shutil
 from typing import Callable, Optional, List
 import torch
 from torch.utils import data
@@ -132,19 +133,19 @@ def _check_integrity(self) -> bool:
         test_dir = os.path.join(self.root, "test")
         has_test_dir = os.path.isdir(test_dir)
 
-        if not has_train_dir and not has_test_dir:
+        if (not has_train_dir) or (not has_test_dir):
             return False
 
         has_train_x = os.path.isfile(os.path.join(train_dir, "X_train.txt"))
         has_train_y = os.path.isfile(os.path.join(train_dir, "y_train.txt"))
 
-        if not has_train_x and not has_train_y:
+        if (not has_train_x) or (not has_train_y):
             return False
 
         has_test_x = os.path.isfile(os.path.join(test_dir, "X_test.txt"))
-        has_test_y = os.path.isfile(os.path.join(train_dir, "y_test.txt"))
+        has_test_y = os.path.isfile(os.path.join(test_dir, "y_test.txt"))
 
-        if not has_test_x or not has_test_y:
+        if (not has_test_x) or (not has_test_y):
             return False
 
         return True
@@ -154,15 +155,11 @@ def _load_data(self):
         data_file = "X_train.txt" if self.train else "X_test.txt"
         target_file = "y_train.txt" if self.train else "y_test.txt"
 
-        data = pd.read_csv(
-            os.path.join(data_dir, data_file), delim_whitespace=True, header=None
-        )
-        targets = np.loadtxt(
-            path.join(data_dir, target_file), delimiter="\n", dtype="int64"
-        ).tolist()
+        data = np.loadtxt(os.path.join(data_dir, data_file), dtype="float32")
+        targets = np.loadtxt(path.join(data_dir, target_file), dtype="int64")
 
-        self.data = torch.tensor(data.values, dtype=torch.float)
-        self.targets = torch.tensor(targets, dtype=torch.long) - 1
+        self.data = torch.from_numpy(data)
+        self.targets = torch.from_numpy(targets) - 1
 
     def download(self):
         """Downloads the dataset if it doesn't exist already"""
@@ -183,8 +180,8 @@ def download(self):
         source_dir = os.path.join(self.root, "UCI HAR Dataset")
         data_files = os.listdir(source_dir)
         for filename in data_files:
-            os.rename(
-                os.path.join(source_dir, filename), os.path.join(self.root, filename)
-            )
+            src = os.path.join(source_dir, filename)
+            dest = os.path.join(self.root, filename)
+            os.rename(src, dest)
 
         os.rmdir(source_dir)
diff --git a/torchhd/embeddings.py b/torchhd/embeddings.py
@@ -956,6 +956,10 @@ def __init__(
             out_features + 1, out_features, vsa, low=low, high=high, **factory_kwargs
         )
 
+    def reset_parameters(self) -> None:
+        self.key.reset_parameters()
+        self.density_encoding.reset_parameters()
+
     # Specify the steps needed to perform the encoding
     def forward(self, input: Tensor) -> Tensor:
         # Perform binding of key and value vectors
diff --git a/torchhd/tests/basis_hv/test_base_tensor.py b/torchhd/tests/basis_hv/test_base_tensor.py
@@ -0,0 +1,99 @@
+#
+# MIT License
+#
+# Copyright (c) 2023 Mike Heddes, Igor Nunes, Pere Vergés, Denis Kleyko, and Danny Abraham
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+#
+import pytest
+import torch
+
+from torchhd import VSATensor
+
+
+class TestVSATensor:
+    def test_empty(self):
+        with pytest.raises(NotImplementedError):
+            VSATensor.empty(4, 525)
+
+    def test_identity(self):
+        with pytest.raises(NotImplementedError):
+            VSATensor.identity(4, 525)
+
+    def test_random(self):
+        with pytest.raises(NotImplementedError):
+            VSATensor.random(4, 525)
+
+    def test_bundle(self):
+        a = torch.randn(100).as_subclass(VSATensor)
+        b = torch.randn(100).as_subclass(VSATensor)
+
+        with pytest.raises(NotImplementedError):
+            a.bundle(b)
+
+    def test_multibundle(self):
+        a = torch.randn(10, 100).as_subclass(VSATensor)
+
+        with pytest.raises(NotImplementedError):
+            a.multibundle()
+
+    def test_bind(self):
+        a = torch.randn(100).as_subclass(VSATensor)
+        b = torch.randn(100).as_subclass(VSATensor)
+
+        with pytest.raises(NotImplementedError):
+            a.bind(b)
+
+    def test_multibind(self):
+        a = torch.randn(10, 100).as_subclass(VSATensor)
+
+        with pytest.raises(NotImplementedError):
+            a.multibind()
+
+    def test_inverse(self):
+        a = torch.randn(100).as_subclass(VSATensor)
+
+        with pytest.raises(NotImplementedError):
+            a.inverse()
+
+    def test_negative(self):
+        a = torch.randn(100).as_subclass(VSATensor)
+
+        with pytest.raises(NotImplementedError):
+            a.negative()
+
+    def test_permute(self):
+        a = torch.randn(100).as_subclass(VSATensor)
+
+        with pytest.raises(NotImplementedError):
+            a.permute()
+
+    def test_dot_similarity(self):
+        a = torch.randn(100).as_subclass(VSATensor)
+        b = torch.randn(100).as_subclass(VSATensor)
+
+        with pytest.raises(NotImplementedError):
+            a.dot_similarity(b)
+
+    def test_cosine_similarity(self):
+        a = torch.randn(100).as_subclass(VSATensor)
+        b = torch.randn(100).as_subclass(VSATensor)
+
+        with pytest.raises(NotImplementedError):
+            a.cosine_similarity(b)
diff --git a/torchhd/tests/test_datasets.py b/torchhd/tests/test_datasets.py
diff --git a/torchhd/tests/test_embeddings.py b/torchhd/tests/test_embeddings.py
diff --git a/torchhd/tests/test_operations.py b/torchhd/tests/test_operations.py