diff --git a/examples/ag_news.py b/examples/ag_news.py index 2f60ad99..a308db66 100644 --- a/examples/ag_news.py +++ b/examples/ag_news.py @@ -1,6 +1,5 @@ import numpy as np from sklearn.metrics import classification_report -from sklearn.utils import shuffle from torchtext.datasets import AG_NEWS from npc_gzip.compressors.base import BaseCompressor diff --git a/examples/imdb.py b/examples/imdb.py index 44be9f1b..49848bc1 100644 --- a/examples/imdb.py +++ b/examples/imdb.py @@ -1,6 +1,5 @@ import numpy as np from sklearn.metrics import classification_report -from sklearn.utils import shuffle from torchtext.datasets import IMDB from npc_gzip.compressors.base import BaseCompressor diff --git a/npc_gzip/aggregations.py b/npc_gzip/aggregations.py index adedfdf5..1f2799db 100644 --- a/npc_gzip/aggregations.py +++ b/npc_gzip/aggregations.py @@ -1,7 +1,5 @@ import itertools -import numpy as np - def concatenate_with_space(stringa: str, stringb: str) -> str: """ diff --git a/npc_gzip/distance.py b/npc_gzip/distance.py index f999b1d6..8350be19 100644 --- a/npc_gzip/distance.py +++ b/npc_gzip/distance.py @@ -1,4 +1,4 @@ -from typing import Sequence, Union +from typing import Sequence import numpy as np diff --git a/npc_gzip/knn_classifier.py b/npc_gzip/knn_classifier.py index efbb0791..8cffeced 100644 --- a/npc_gzip/knn_classifier.py +++ b/npc_gzip/knn_classifier.py @@ -1,4 +1,4 @@ -from typing import Optional, Sequence, Union +from typing import Optional, Sequence import numpy as np from tqdm import tqdm @@ -86,10 +86,10 @@ def __init__( assert ( self.training_inputs.shape == self.training_labels.shape ), f""" - Training Inputs and Labels did not maintain their + Training Inputs and Labels did not maintain their shape during the conversion from lists to numpy arrays. This is most likely a bug in the numpy package: - + self.training_inputs.shape: {self.training_inputs.shape} self.training_labels.shape: {self.training_labels.shape} """ @@ -309,9 +309,9 @@ def predict( assert ( top_k <= x.shape[0] ), f""" - top_k ({top_k}) must be less or equal to than the number of + top_k ({top_k}) must be less or equal to than the number of samples provided to be predicted on ({x.shape[0]}) - + """ # sample training inputs and labels diff --git a/npc_gzip/utils.py b/npc_gzip/utils.py index f30c5b9e..63da94ab 100644 --- a/npc_gzip/utils.py +++ b/npc_gzip/utils.py @@ -10,14 +10,12 @@ import random import string -from npc_gzip.exceptions import InvalidObjectTypeException - def generate_sentence(number_of_words: int = 10) -> str: """ Generates a sentence of random - numbers and letters, with - `number_of_words` words in the + numbers and letters, with + `number_of_words` words in the sentence such that len(out.split()) \ == `number_of_words`. diff --git a/original_codebase/data.py b/original_codebase/data.py index 2ff3b0a4..e5ab6e9a 100644 --- a/original_codebase/data.py +++ b/original_codebase/data.py @@ -1,16 +1,14 @@ import csv import os import random -from collections import OrderedDict, defaultdict +from collections import defaultdict from collections.abc import Iterable from typing import Optional, Sequence, Union import numpy as np -import torch import unidecode from datasets import load_dataset from sklearn.datasets import fetch_20newsgroups -from torch.utils.data import DataLoader, Subset def _load_csv_filepath(csv_filepath: str) -> list: diff --git a/original_codebase/experiments.py b/original_codebase/experiments.py index 2e42f7af..ea74b41c 100644 --- a/original_codebase/experiments.py +++ b/original_codebase/experiments.py @@ -1,20 +1,11 @@ # Experiment framework import operator -import os -import pickle import random -import statistics -from collections import Counter, defaultdict -from copy import deepcopy -from functools import partial -from itertools import repeat -from statistics import mode +from collections import defaultdict from typing import Any, Callable, Optional import numpy as np -import torch from compressors import DefaultCompressor -from sklearn.metrics.cluster import adjusted_rand_score, normalized_mutual_info_score from tqdm import tqdm diff --git a/tests/test_aggregations.py b/tests/test_aggregations.py index 714268ed..61b3cab1 100644 --- a/tests/test_aggregations.py +++ b/tests/test_aggregations.py @@ -1,7 +1,4 @@ -import pytest - from npc_gzip.aggregations import aggregate_strings, concatenate_with_space -from npc_gzip.exceptions import StringTooShortException class TestAggregations: diff --git a/tests/test_base_compressor.py b/tests/test_base_compressor.py index c99a374d..27e4b5b6 100644 --- a/tests/test_base_compressor.py +++ b/tests/test_base_compressor.py @@ -1,5 +1,4 @@ import gzip -from types import ModuleType import pytest diff --git a/tests/test_bz2_compressor.py b/tests/test_bz2_compressor.py index ea1ca097..b506916c 100644 --- a/tests/test_bz2_compressor.py +++ b/tests/test_bz2_compressor.py @@ -1,11 +1,7 @@ import bz2 -from types import ModuleType - -import pytest from npc_gzip.compressors.base import BaseCompressor from npc_gzip.compressors.bz2_compressor import Bz2Compressor -from npc_gzip.exceptions import InvalidCompressorException class TestBz2Compressor: diff --git a/tests/test_gzip_compressor.py b/tests/test_gzip_compressor.py index d7ca247d..a808ca80 100644 --- a/tests/test_gzip_compressor.py +++ b/tests/test_gzip_compressor.py @@ -1,11 +1,7 @@ import gzip -from types import ModuleType - -import pytest from npc_gzip.compressors.base import BaseCompressor from npc_gzip.compressors.gzip_compressor import GZipCompressor -from npc_gzip.exceptions import InvalidCompressorException class TestBz2Compressor: diff --git a/tests/test_knn_classifier.py b/tests/test_knn_classifier.py index 55f40cd7..8d4420e3 100644 --- a/tests/test_knn_classifier.py +++ b/tests/test_knn_classifier.py @@ -7,10 +7,8 @@ from npc_gzip.compressors.bz2_compressor import Bz2Compressor from npc_gzip.compressors.gzip_compressor import GZipCompressor from npc_gzip.compressors.lzma_compressor import LzmaCompressor -from npc_gzip.distance import Distance from npc_gzip.exceptions import ( InputLabelEqualLengthException, - InvalidObjectTypeException, UnsupportedDistanceMetricException, ) from npc_gzip.knn_classifier import KnnClassifier diff --git a/tests/test_lzma_compressor.py b/tests/test_lzma_compressor.py index 56c40664..0628416b 100644 --- a/tests/test_lzma_compressor.py +++ b/tests/test_lzma_compressor.py @@ -1,11 +1,7 @@ import lzma -from types import ModuleType - -import pytest from npc_gzip.compressors.base import BaseCompressor from npc_gzip.compressors.lzma_compressor import LzmaCompressor -from npc_gzip.exceptions import InvalidCompressorException class TestBz2Compressor: diff --git a/tests/test_utils.py b/tests/test_utils.py index 341762c2..868ba1a8 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -2,7 +2,6 @@ import pytest -from npc_gzip.exceptions import InvalidObjectTypeException from npc_gzip.utils import generate_dataset, generate_sentence