Fixing black formatter (#301)

stanfordmlgroup · Oct 11, 2022 · 3a064b4 · 3a064b4
1 parent 84f9bbc
commit 3a064b4
Show file tree

Hide file tree

Showing 16 changed files with 63 additions and 50 deletions.
diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml
@@ -11,7 +11,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: [3.6, 3.7, 3.8]
+        python-version: ['3.7', '3.8', '3.9', '3.10']
 
     steps:
       - uses: actions/checkout@v2

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -16,7 +16,7 @@ repos:
       - id: check-merge-conflict # Check for files that contain merge conflict strings.
       - id: debug-statements # Check for debugger imports and py37+ `breakpoint()` calls in python source.
   - repo: https://github.com/psf/black
-    rev: 19.10b0
+    rev: 22.8.0
     hooks:
       - id: black
   - repo: local

diff --git a/Makefile b/Makefile
@@ -1,6 +1,6 @@
 
 install:
-	pip install poetry && poetry install
+	pip install poetry==1.2.1 && poetry install
 
 package:
 	poetry build

diff --git a/examples/simulations/regression_sim.py b/examples/simulations/regression_sim.py
@@ -29,7 +29,12 @@
     Y = (X @ beta1 + args.noise_lvl * noise).squeeze()
     print(X.shape, Y.shape)
 
-    X_train, X_test = X[:1000, :], X[1000:,]
+    X_train, X_test = (
+        X[:1000, :],
+        X[
+            1000:,
+        ],
+    )
     Y_train, Y_test = Y[:1000], Y[1000:]
 
     ngb = NGBoost(

diff --git a/figures/toy.py b/figures/toy.py
@@ -20,7 +20,7 @@ def gen_data(n=50, bound=1, deg=3, beta=1, noise=0.9, intcpt=-1):
     x = np.linspace(-bound, bound, n)[:, np.newaxis]
     h = np.linspace(-bound, bound, n)[:, np.newaxis]
     e = np.random.randn(*x.shape) * (0.1 + 10 * np.abs(x))
-    y = 50 * (x ** deg) + h * beta + noise * e + intcpt
+    y = 50 * (x**deg) + h * beta + noise * e + intcpt
     return x, y.squeeze(), np.c_[h, np.ones_like(h)]
 
 

diff --git a/figures/toy_single.py b/figures/toy_single.py
@@ -20,7 +20,7 @@ def gen_data(n=50, bound=1, deg=3, beta=1, noise=0.9, intcpt=-1):
     x = np.linspace(-bound, bound, n)[:, np.newaxis]
     h = np.linspace(-bound, bound, n)[:, np.newaxis]
     e = np.random.randn(*x.shape) * (0.1 + 10 * np.abs(x))
-    y = 50 * (x ** deg) + h * beta + noise * e + intcpt
+    y = 50 * (x**deg) + h * beta + noise * e + intcpt
     return x, y.squeeze(), np.c_[h, np.ones_like(h)]
 
 

diff --git a/ngboost/distns/laplace.py b/ngboost/distns/laplace.py
@@ -18,7 +18,7 @@ def d_score(self, Y):
 
     def metric(self):
         FI = np.zeros((self.loc.shape[0], 2, 2))
-        FI[:, 0, 0] = 1 / self.scale ** 2
+        FI[:, 0, 0] = 1 / self.scale**2
         FI[:, 1, 1] = 1
         return FI
 

diff --git a/ngboost/distns/lognormal.py b/ngboost/distns/lognormal.py
@@ -22,8 +22,8 @@ def d_score(self, Y):
         Z = (lT - self.loc) / self.scale
 
         D_uncens = np.zeros((self.loc.shape[0], 2))
-        D_uncens[:, 0] = (self.loc - lT) / (self.scale ** 2)
-        D_uncens[:, 1] = 1 - ((self.loc - lT) ** 2) / (self.scale ** 2)
+        D_uncens[:, 0] = (self.loc - lT) / (self.scale**2)
+        D_uncens[:, 1] = 1 - ((self.loc - lT) ** 2) / (self.scale**2)
 
         D_cens = np.zeros((self.loc.shape[0], 2))
         D_cens[:, 0] = -sp.stats.norm.pdf(lT, loc=self.loc, scale=self.scale) / (
@@ -39,7 +39,7 @@ def d_score(self, Y):
 
     def metric(self):
         FI = np.zeros((self.loc.shape[0], 2, 2))
-        FI[:, 0, 0] = 1 / (self.scale ** 2) + self.eps
+        FI[:, 0, 0] = 1 / (self.scale**2) + self.eps
         FI[:, 1, 1] = 2
         return FI
 
@@ -83,7 +83,7 @@ def d_score(self, Y):
     def metric(self):
         I = np.zeros((self.loc.shape[0], 2, 2))
         I[:, 0, 0] = 2
-        I[:, 1, 1] = self.scale ** 2
+        I[:, 1, 1] = self.scale**2
         I /= 2 * np.sqrt(np.pi)
         return I
 

diff --git a/ngboost/distns/normal.py b/ngboost/distns/normal.py
@@ -68,7 +68,7 @@ def __init__(self, params):
         super().__init__(params)
         self.loc = params[0]
         self.scale = np.exp(params[1])
-        self.var = self.scale ** 2
+        self.var = self.scale**2
         self.dist = dist(loc=self.loc, scale=self.scale)
 
     def fit(Y):

diff --git a/ngboost/distns/t.py b/ngboost/distns/t.py
@@ -60,7 +60,7 @@ def __init__(self, params):
         super().__init__(params)
         self.loc = params[0]
         self.scale = np.exp(params[1])
-        self.var = self.scale ** 2
+        self.var = self.scale**2
         self.df = np.exp(params[2])
         self.dist = dist(loc=self.loc, scale=self.scale, df=self.df)
 
@@ -123,7 +123,7 @@ def __init__(self, params):
         super().__init__(params)
         self.loc = params[0]
         self.scale = np.exp(params[1])
-        self.var = self.scale ** 2
+        self.var = self.scale**2
         # fixed df
         self.df = np.ones_like(self.loc) * self.fixed_df
         self.dist = dist(loc=self.loc, scale=self.scale, df=self.df)

diff --git a/ngboost/distns/utils.py b/ngboost/distns/utils.py
@@ -26,8 +26,8 @@ class SurvivalDistn(Dist):
 
         def fit(Y):
             """
-                Parameters:
-                    Y : a object with keys {time, event}, each containing an array
+            Parameters:
+                Y : a object with keys {time, event}, each containing an array
             """
             return Dist.fit(Y["Time"])
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "ngboost"
-version = "0.3.13dev"
+version = "0.3.14dev"
 description = "Library for probabilistic predictions via gradient boosting."
 authors = ["Stanford ML Group <[email protected]>"]
 readme = "README.md"
@@ -13,21 +13,21 @@ classifiers = [
 license = "Apache License 2.0"
 
 [tool.poetry.dependencies]
-python = ">=3.6.2, <4.0"
-scikit-learn = ">=0.21"
-numpy = ">=1.17"
-scipy = ">=1.3"
+python = ">=3.7.1, <3.11"
+scikit-learn = ">=1.0.2"
+numpy = ">=1.21.2"
+scipy = ">=1.7.2"
 tqdm = ">=4.3"
 lifelines = ">=0.25"
-
+pandas = ">=1.3.5"
+flake8 = "^5.0.4"
 
 [tool.poetry.dev-dependencies]
 pytest = "^6.1.2"
-black = "^20.8b1"
+black = "^22.8.0"
 pre-commit = "^2.0"
 isort = "^5.6.4"
 pylint = "^2.6.0"
-flake8 = "^3.8.4"
 
 [build-system]
 requires = ["poetry-core>=1.0.0"]

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -2,7 +2,7 @@
 
 import numpy as np
 import pytest
-from sklearn.datasets import load_boston, load_breast_cancer
+from sklearn.datasets import fetch_california_housing, load_breast_cancer
 from sklearn.model_selection import train_test_split
 
 Tuple4Array = Tuple[np.array, np.array, np.array, np.array]
@@ -23,27 +23,31 @@ def pytest_configure(config):
 
 
 @pytest.fixture(scope="session")
-def boston_data() -> Tuple4Array:
-    X, Y = load_boston(True)
+def california_housing_data() -> Tuple4Array:
+    X, Y = fetch_california_housing(return_X_y=True)
     return train_test_split(X, Y, test_size=0.2, random_state=23)
 
 
 @pytest.fixture(scope="session")
-def boston_survival_data() -> Tuple5Array:
-    X, Y = load_boston(True)
+def california_housing_survival_data() -> Tuple5Array:
+    X, Y = fetch_california_housing(return_X_y=True)
     X_surv_train, X_surv_test, Y_surv_train, Y_surv_test = train_test_split(
         X, Y, test_size=0.2, random_state=14
     )
 
+    # calculate threshold for censoring data
+    censor_threshold = np.quantile(Y_surv_train, 0.75)
     # introduce administrative censoring to simulate survival data
-    T_surv_train = np.minimum(Y_surv_train, 30)  # time of an event or censoring
+    T_surv_train = np.minimum(
+        Y_surv_train, censor_threshold
+    )  # time of an event or censoring
     E_surv_train = (
-        Y_surv_train > 30
+        Y_surv_train > censor_threshold
     )  # 1 if T[i] is the time of an event, 0 if it's a time of censoring
     return X_surv_train, X_surv_test, T_surv_train, E_surv_train, Y_surv_test
 
 
 @pytest.fixture(scope="session")
 def breast_cancer_data() -> Tuple4Array:
-    X, Y = load_breast_cancer(True)
+    X, Y = load_breast_cancer(return_X_y=True)
     return train_test_split(X, Y, test_size=0.2, random_state=12)
diff --git a/tests/test_basic.py b/tests/test_basic.py
@@ -34,12 +34,12 @@ def test_classification(breast_cancer_data):
 
 
 # TODO: This is non-deterministic in the model fitting
-def test_regression(boston_data):
+def test_regression(california_housing_data):
     from sklearn.metrics import (  # pylint: disable=import-outside-toplevel
         mean_squared_error,
     )
 
-    x_train, x_test, y_train, y_test = boston_data
+    x_train, x_test, y_train, y_test = california_housing_data
     ngb = NGBRegressor(verbose=False)
     ngb.fit(x_train, y_train)
     preds = ngb.predict(x_test)

diff --git a/tests/test_distns.py b/tests/test_distns.py
@@ -40,10 +40,8 @@
         DecisionTreeRegressor(criterion="friedman_mse", max_depth=5),
     ],
 )
-def test_dists_runs_on_examples_logscore(
-    dist: Distn, learner, boston_data: Tuple4Array
-):
-    X_train, X_test, y_train, y_test = boston_data
+def test_dists_runs_on_examples_logscore(dist: Distn, learner, california_housing_data):
+    X_train, X_test, y_train, y_test = california_housing_data
     # TODO: test early stopping features
     ngb = NGBRegressor(Dist=dist, Score=LogScore, Base=learner, verbose=False)
     ngb.fit(X_train, y_train)
@@ -61,10 +59,8 @@ def test_dists_runs_on_examples_logscore(
         DecisionTreeRegressor(criterion="friedman_mse", max_depth=5),
     ],
 )
-def test_dists_runs_on_examples_crpscore(
-    dist: Distn, learner, boston_data: Tuple4Array
-):
-    X_train, X_test, y_train, y_test = boston_data
+def test_dists_runs_on_examples_crpscore(dist: Distn, learner, california_housing_data):
+    X_train, X_test, y_train, y_test = california_housing_data
     # TODO: test early stopping features
     ngb = NGBRegressor(Dist=dist, Score=CRPScore, Base=learner, verbose=False)
     ngb.fit(X_train, y_train)
@@ -83,9 +79,15 @@ def test_dists_runs_on_examples_crpscore(
     ],
 )
 def test_survival_runs_on_examples(
-    dist: Distn, score: Score, learner, boston_survival_data: Tuple5Array
+    dist: Distn, score: Score, learner, california_housing_survival_data
 ):
-    X_train, X_test, T_surv_train, E_surv_train, Y_surv_test = boston_survival_data
+    (
+        X_train,
+        X_test,
+        T_surv_train,
+        E_surv_train,
+        Y_surv_test,
+    ) = california_housing_survival_data
     # test early stopping features
     ngb = NGBSurvival(Dist=dist, Score=score, Base=learner, verbose=False)
     ngb.fit(X_train, T_surv_train, E_surv_train)

diff --git a/tests/test_pickling.py b/tests/test_pickling.py
@@ -9,7 +9,9 @@
 
 # name = learners_data to avoid pylint redefined-outer-name
 @pytest.fixture(name="learners_data")
-def fixture_learners_data(breast_cancer_data, boston_data, boston_survival_data):
+def fixture_learners_data(
+    breast_cancer_data, california_housing_data, california_housing_survival_data
+):
     """
     Returns:
         A list of iterables,
@@ -23,26 +25,26 @@ def fixture_learners_data(breast_cancer_data, boston_data, boston_survival_data)
     ngb.fit(X_class_train, Y_class_train)
     models_data.append((ngb, X_class_train, ngb.predict(X_class_train)))
 
-    X_reg_train, _, Y_reg_train, _ = boston_data
+    X_reg_train, _, Y_reg_train, _ = california_housing_data
     ngb = NGBRegressor(verbose=False, n_estimators=10)
     ngb.fit(X_reg_train, Y_reg_train)
     models_data.append((ngb, X_reg_train, ngb.predict(X_reg_train)))
 
-    X_surv_train, _, T_surv_train, E_surv_train, _ = boston_survival_data
+    X_surv_train, _, T_surv_train, E_surv_train, _ = california_housing_survival_data
     ngb = NGBSurvival(verbose=False, n_estimators=10)
     ngb.fit(X_surv_train, T_surv_train, E_surv_train)
     models_data.append((ngb, X_surv_train, ngb.predict(X_surv_train)))
 
     ngb = NGBRegressor(Dist=MultivariateNormal(2), n_estimators=10)
-    ngb.fit(X_surv_train, np.vstack([T_surv_train, E_surv_train]).T)
+    ngb.fit(X_surv_train, np.vstack((T_surv_train, E_surv_train)).T)
     models_data.append((ngb, X_surv_train, ngb.predict(X_surv_train)))
     return models_data
 
 
 def test_model_save(learners_data):
     """
-        Tests that the model can be loaded and predict still works
-        It checks that the new predictions are the same as pre-pickling
+    Tests that the model can be loaded and predict still works
+    It checks that the new predictions are the same as pre-pickling
     """
     for learner, data, preds in learners_data:
         serial = pickle.dumps(learner)