Skip to content

Commit

Permalink
Add tests for datasets and coverage report (#130)
Browse files Browse the repository at this point in the history
* Update readme

* Add test for uci benchmark metadata

* Add test for all dataset downloads

* [github-action] formatting fixes

* Add test for report and score

* [github-action] formatting fixes

* Remove main

* [github-action] formatting fixes

* Remove parenthesis

* Remove try block

* Exclude UCIHAR

* Add escape characters

* Add quotes

* Exclude UCIHAR

* Excelude CCPP

* Excliude collection

* [github-action] formatting fixes

* Debug dataset downloads

* Debug dataset downloads

* Debug dataset downloads

* [github-action] formatting fixes

* Debug dataset downloads

* [github-action] formatting fixes

* Debug dataset downloads

* Debug dataset downloads

* [github-action] formatting fixes

* Add openpyxl dependency

* Fix error in UCIHAR dataset

* [github-action] formatting fixes

* Add coverage to the dev requirements

* Update readme

* Fix command in readme

* Split dataset download test

* Add tests for base class

* [github-action] formatting fixes

* Add test for resonator

* [github-action] formatting fixes

* Delete data dir only ones

---------

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
Co-authored-by: Mike Heddes <[email protected]>
  • Loading branch information
3 people authored Mar 7, 2023
1 parent d5c9c5f commit c604323
Show file tree
Hide file tree
Showing 12 changed files with 429 additions and 39 deletions.
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,14 @@ To create a clean build, remove the `/build` and `/docs/generated` directories.
2. Create a new GitHub release. Set the tag according to [PEP 440](https://peps.python.org/pep-0440/), e.g., v1.5.2, and provide a clear description of the changes. You can use GitHub's "auto-generate release notes" button. Look at previous releases for examples.
3. A GitHub release triggers a GitHub action that builds the library and publishes it to PyPi and Conda in addition to the documentation website.

### Running tests

To run the unit tests located in [`torchhd/tests`](https://github.com/hyperdimensional-computing/torchhd/tree/main/torchhd/tests) do the following:
1. Use `pip install -r dev-requirements.txt` to install the required development packages.
2. Then run the tests using just `pytest`.

Optionally, to measure the code coverage use `coverage run -m --omit="torchhd/tests/**" pytest` to create the coverage report. You can then view this report with `coverage report`.

### License

This library is [MIT licensed](https://github.com/hyperdimensional-computing/torchhd/blob/main/LICENSE).
Expand Down
1 change: 1 addition & 0 deletions conda/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ requirements:
- scipy
- pandas
- requests
- openpyxl
- tqdm

test:
Expand Down
4 changes: 3 additions & 1 deletion dev-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,6 @@ numpy
flake8
pytest
black
tqdm
tqdm
openpyxl
coverage
1 change: 1 addition & 0 deletions docs/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,6 @@ scipy
requests
tqdm
numpy
openpyxl
sphinx
sphinx-rtd-theme
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
"numpy",
"requests",
"tqdm",
"openpyxl",
],
packages=find_packages(exclude=["docs", "torchhd.tests", "examples"]),
python_requires=">=3.6, <4",
Expand Down
4 changes: 2 additions & 2 deletions torchhd/datasets/beijing_air_quality.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,14 +177,14 @@ def _load_data(self):
# Map the wind directions to a category identifier (int)
# # Save the mapping for user referencing
self.wind_directions = tuple(sorted(list(set(data.wind_direction))))
data.loc[:, "wind_direction"] = data.wind_direction.apply(
data.wind_direction = data.wind_direction.apply(
lambda x: self.wind_directions.index(x)
)

# Map the stations to a category identifier (int)
# Save the mapping for user referencing
self.stations = tuple(sorted(list(set(data.station))))
data.loc[:, "station"] = data.station.apply(lambda x: self.stations.index(x))
data.station = data.station.apply(lambda x: self.stations.index(x))

categorical = data[self.categorical_columns]
self.categorical_data = torch.tensor(categorical.values, dtype=torch.long)
Expand Down
27 changes: 12 additions & 15 deletions torchhd/datasets/ucihar.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#
import os
import os.path as path
import shutil
from typing import Callable, Optional, List
import torch
from torch.utils import data
Expand Down Expand Up @@ -132,19 +133,19 @@ def _check_integrity(self) -> bool:
test_dir = os.path.join(self.root, "test")
has_test_dir = os.path.isdir(test_dir)

if not has_train_dir and not has_test_dir:
if (not has_train_dir) or (not has_test_dir):
return False

has_train_x = os.path.isfile(os.path.join(train_dir, "X_train.txt"))
has_train_y = os.path.isfile(os.path.join(train_dir, "y_train.txt"))

if not has_train_x and not has_train_y:
if (not has_train_x) or (not has_train_y):
return False

has_test_x = os.path.isfile(os.path.join(test_dir, "X_test.txt"))
has_test_y = os.path.isfile(os.path.join(train_dir, "y_test.txt"))
has_test_y = os.path.isfile(os.path.join(test_dir, "y_test.txt"))

if not has_test_x or not has_test_y:
if (not has_test_x) or (not has_test_y):
return False

return True
Expand All @@ -154,15 +155,11 @@ def _load_data(self):
data_file = "X_train.txt" if self.train else "X_test.txt"
target_file = "y_train.txt" if self.train else "y_test.txt"

data = pd.read_csv(
os.path.join(data_dir, data_file), delim_whitespace=True, header=None
)
targets = np.loadtxt(
path.join(data_dir, target_file), delimiter="\n", dtype="int64"
).tolist()
data = np.loadtxt(os.path.join(data_dir, data_file), dtype="float32")
targets = np.loadtxt(path.join(data_dir, target_file), dtype="int64")

self.data = torch.tensor(data.values, dtype=torch.float)
self.targets = torch.tensor(targets, dtype=torch.long) - 1
self.data = torch.from_numpy(data)
self.targets = torch.from_numpy(targets) - 1

def download(self):
"""Downloads the dataset if it doesn't exist already"""
Expand All @@ -183,8 +180,8 @@ def download(self):
source_dir = os.path.join(self.root, "UCI HAR Dataset")
data_files = os.listdir(source_dir)
for filename in data_files:
os.rename(
os.path.join(source_dir, filename), os.path.join(self.root, filename)
)
src = os.path.join(source_dir, filename)
dest = os.path.join(self.root, filename)
os.rename(src, dest)

os.rmdir(source_dir)
4 changes: 4 additions & 0 deletions torchhd/embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -956,6 +956,10 @@ def __init__(
out_features + 1, out_features, vsa, low=low, high=high, **factory_kwargs
)

def reset_parameters(self) -> None:
self.key.reset_parameters()
self.density_encoding.reset_parameters()

# Specify the steps needed to perform the encoding
def forward(self, input: Tensor) -> Tensor:
# Perform binding of key and value vectors
Expand Down
99 changes: 99 additions & 0 deletions torchhd/tests/basis_hv/test_base_tensor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
#
# MIT License
#
# Copyright (c) 2023 Mike Heddes, Igor Nunes, Pere Vergés, Denis Kleyko, and Danny Abraham
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
#
import pytest
import torch

from torchhd import VSATensor


class TestVSATensor:
def test_empty(self):
with pytest.raises(NotImplementedError):
VSATensor.empty(4, 525)

def test_identity(self):
with pytest.raises(NotImplementedError):
VSATensor.identity(4, 525)

def test_random(self):
with pytest.raises(NotImplementedError):
VSATensor.random(4, 525)

def test_bundle(self):
a = torch.randn(100).as_subclass(VSATensor)
b = torch.randn(100).as_subclass(VSATensor)

with pytest.raises(NotImplementedError):
a.bundle(b)

def test_multibundle(self):
a = torch.randn(10, 100).as_subclass(VSATensor)

with pytest.raises(NotImplementedError):
a.multibundle()

def test_bind(self):
a = torch.randn(100).as_subclass(VSATensor)
b = torch.randn(100).as_subclass(VSATensor)

with pytest.raises(NotImplementedError):
a.bind(b)

def test_multibind(self):
a = torch.randn(10, 100).as_subclass(VSATensor)

with pytest.raises(NotImplementedError):
a.multibind()

def test_inverse(self):
a = torch.randn(100).as_subclass(VSATensor)

with pytest.raises(NotImplementedError):
a.inverse()

def test_negative(self):
a = torch.randn(100).as_subclass(VSATensor)

with pytest.raises(NotImplementedError):
a.negative()

def test_permute(self):
a = torch.randn(100).as_subclass(VSATensor)

with pytest.raises(NotImplementedError):
a.permute()

def test_dot_similarity(self):
a = torch.randn(100).as_subclass(VSATensor)
b = torch.randn(100).as_subclass(VSATensor)

with pytest.raises(NotImplementedError):
a.dot_similarity(b)

def test_cosine_similarity(self):
a = torch.randn(100).as_subclass(VSATensor)
b = torch.randn(100).as_subclass(VSATensor)

with pytest.raises(NotImplementedError):
a.cosine_similarity(b)
Loading

0 comments on commit c604323

Please sign in to comment.