AutoResearch · younesStrittmatter · Nov 18, 2023 · Nov 5, 2023
diff --git a/.github/workflows/test-pre-commit-hooks.yml b/.github/workflows/test-pre-commit-hooks.yml
@@ -0,0 +1,25 @@
+# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
+# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
+
+name: Test pre-commit-hooks
+
+on:
+  pull_request:
+  merge_group:
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v3
+    - name: Set up Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: '3.8'
+        cache: 'pip'
+    - run: pip install pre-commit
+    - uses: actions/cache@v3
+      with:
+        path: ~/.cache/pre-commit
+        key: pre-commit-3|${{ env.pythonLocation }}|${{ runner.os }}|${{ hashFiles('.pre-commit-config.yaml') }}
+    - run: pre-commit run --all-files --show-diff-on-failure --color=always
diff --git a/src/autora/experimentalist/novelty/__init__.py b/src/autora/experimentalist/novelty/__init__.py
@@ -1,7 +1,7 @@
 """
 Novelty Experimentalist
 """
-from typing import Iterable, Literal, Optional, Union
+from typing import Literal, Optional, Union
 
 import numpy as np
 import pandas as pd
@@ -44,13 +44,15 @@ def sample(
     """
     This novelty experimentalist re-arranges the pool of experimental conditions according to their
     dissimilarity with respect to a reference pool. The default dissimilarity is calculated
-    as the average of the pairwise distances between the conditions in the pool and the reference conditions.
+    as the average of the pairwise distances between the conditions in the pool and the reference
+    conditions.
     If no number of samples are specified, all samples will be ordered and returned from the pool.
 
     Args:
         conditions: pool of experimental conditions to evaluate dissimilarity
         reference_conditions: reference pool of experimental conditions
-        num_samples: number of samples to select from the pool of experimental conditions (the default is to select all)
+        num_samples: number of samples to select from the pool of experimental conditions
+        (the default is to select all)
         metric (str): dissimilarity measure. Options: 'euclidean', 'manhattan', 'chebyshev',
             'minkowski', 'wminkowski', 'seuclidean', 'mahalanobis', 'haversine',
             'hamming', 'canberra', 'braycurtis', 'matching', 'jaccard', 'dice',
@@ -61,7 +63,9 @@ def sample(
         Sampled pool of conditions
     """
 
-    new_conditions = novelty_score_sample(conditions, reference_conditions, num_samples, metric, integration)
+    new_conditions = novelty_score_sample(
+        conditions, reference_conditions, num_samples, metric, integration
+    )
     new_conditions.drop("score", axis=1, inplace=True)
 
     return new_conditions
@@ -77,13 +81,15 @@ def score_sample(
     """
     This dissimilarity samples re-arranges the pool of experimental conditions according to their
     dissimilarity with respect to a reference pool. The default dissimilarity is calculated
-    as the average of the pairwise distances between the conditions in the pool and the reference conditions.
+    as the average of the pairwise distances between the conditions in the pool and the reference
+    conditions.
     If no number of samples are specified, all samples will be ordered and returned from the pool.
 
     Args:
-        condition_pool: pool of experimental conditions to evaluate dissimilarity
+        conditions: pool of experimental conditions to evaluate dissimilarity
         reference_conditions: reference pool of experimental conditions
-        num_samples: number of samples to select from the pool of experimental conditions (the default is to select all)
+        num_samples: number of samples to select from the pool of experimental conditions
+        (the default is to select all)
         metric (str): dissimilarity measure. Options: 'euclidean', 'manhattan', 'chebyshev',
             'minkowski', 'wminkowski', 'seuclidean', 'mahalanobis', 'haversine',
             'hamming', 'canberra', 'braycurtis', 'matching', 'jaccard', 'dice',
@@ -127,6 +133,7 @@ def score_sample(
     else:
         return conditions
 
+
 novelty_sample = sample
 novelty_sample.__doc__ = """Alias for sample"""
 novelty_score_sample = score_sample

diff --git a/tests/test_exp_novelty_sampler.py b/tests/test_exp_novelty_sampler.py
@@ -1,9 +1,11 @@
+import numpy as np
 import pandas as pd
+
 from autora.experimentalist.novelty import novelty_sample, novelty_score_sample
-import numpy as np
 
 # Note: We encourage you to write more functionality tests for your sampler.
 
+
 def test_output_dimensions():
     condition_pool = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])
     reference_conditions = np.array([[0, 0, 0, 0], [1, 1, 1, 1]])
@@ -13,6 +15,7 @@ def test_output_dimensions():
     # Check that the sampler returns n experiment conditions
     assert condition_pool_new.shape[0] == n
 
+
 def test_output_dimensions_df():
     condition_pool = pd.DataFrame([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])
     reference_conditions = pd.DataFrame([[0, 0, 0, 0], [1, 1, 1, 1]])
@@ -32,9 +35,9 @@ def test_novelty_sample_1D():
     matrix2 = np.array([1, 2, 3])
 
     # reorder matrix1 according to its distances to matrix2
-    reordered_matrix1 = novelty_sample(conditions = matrix1,
-                                        reference_conditions = matrix2,
-                                        num_samples = num_samples)
+    reordered_matrix1 = novelty_sample(
+        conditions=matrix1, reference_conditions=matrix2, num_samples=num_samples
+    )
 
     assert reordered_matrix1.shape[0] == num_samples
     assert reordered_matrix1.shape[1] == 1
@@ -48,25 +51,26 @@ def test_novelty_sample_ND():
     num_samples = 2
 
     # reorder matrix1 according to its distances to matrix2
-    reordered_matrix1 = novelty_sample(conditions = matrix1,
-                                        reference_conditions = matrix2,
-                                        num_samples = num_samples)
+    reordered_matrix1 = novelty_sample(
+        conditions=matrix1, reference_conditions=matrix2, num_samples=num_samples
+    )
 
     assert reordered_matrix1.shape[0] == 2
     assert reordered_matrix1.shape[1] == 3
     assert np.array_equal(reordered_matrix1, np.array([[10, 11, 12], [7, 8, 9]]))
 
+
 def test_novelty_score_sample_ND():
     # define two matrices
     matrix1 = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]])
     matrix2 = np.array([[1, 1, 1], [2, 2, 2], [3, 3, 3]])
     num_samples = 3
 
     # reorder matrix1 according to its distances to matrix2, and obtain distance score
-    new_conditions = novelty_score_sample(conditions=matrix1,
-                                        reference_conditions=matrix2,
-                                        num_samples=num_samples)
+    new_conditions = novelty_score_sample(
+        conditions=matrix1, reference_conditions=matrix2, num_samples=num_samples
+    )
 
     score = np.array(new_conditions["score"])
 
-    assert score[0] > score[1]  and score[1] > score[2]
+    assert score[0] > score[1] and score[1] > score[2]