Skip to content

Commit

Permalink
Fixing black formatter (#301)
Browse files Browse the repository at this point in the history
  • Loading branch information
eco3 authored Oct 11, 2022
1 parent 84f9bbc commit 3a064b4
Show file tree
Hide file tree
Showing 16 changed files with 63 additions and 50 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/pythonpackage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.6, 3.7, 3.8]
python-version: ['3.7', '3.8', '3.9', '3.10']

steps:
- uses: actions/checkout@v2
Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ repos:
- id: check-merge-conflict # Check for files that contain merge conflict strings.
- id: debug-statements # Check for debugger imports and py37+ `breakpoint()` calls in python source.
- repo: https://github.com/psf/black
rev: 19.10b0
rev: 22.8.0
hooks:
- id: black
- repo: local
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@

install:
pip install poetry && poetry install
pip install poetry==1.2.1 && poetry install

package:
poetry build
Expand Down
7 changes: 6 additions & 1 deletion examples/simulations/regression_sim.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,12 @@
Y = (X @ beta1 + args.noise_lvl * noise).squeeze()
print(X.shape, Y.shape)

X_train, X_test = X[:1000, :], X[1000:,]
X_train, X_test = (
X[:1000, :],
X[
1000:,
],
)
Y_train, Y_test = Y[:1000], Y[1000:]

ngb = NGBoost(
Expand Down
2 changes: 1 addition & 1 deletion figures/toy.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def gen_data(n=50, bound=1, deg=3, beta=1, noise=0.9, intcpt=-1):
x = np.linspace(-bound, bound, n)[:, np.newaxis]
h = np.linspace(-bound, bound, n)[:, np.newaxis]
e = np.random.randn(*x.shape) * (0.1 + 10 * np.abs(x))
y = 50 * (x ** deg) + h * beta + noise * e + intcpt
y = 50 * (x**deg) + h * beta + noise * e + intcpt
return x, y.squeeze(), np.c_[h, np.ones_like(h)]


Expand Down
2 changes: 1 addition & 1 deletion figures/toy_single.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def gen_data(n=50, bound=1, deg=3, beta=1, noise=0.9, intcpt=-1):
x = np.linspace(-bound, bound, n)[:, np.newaxis]
h = np.linspace(-bound, bound, n)[:, np.newaxis]
e = np.random.randn(*x.shape) * (0.1 + 10 * np.abs(x))
y = 50 * (x ** deg) + h * beta + noise * e + intcpt
y = 50 * (x**deg) + h * beta + noise * e + intcpt
return x, y.squeeze(), np.c_[h, np.ones_like(h)]


Expand Down
2 changes: 1 addition & 1 deletion ngboost/distns/laplace.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def d_score(self, Y):

def metric(self):
FI = np.zeros((self.loc.shape[0], 2, 2))
FI[:, 0, 0] = 1 / self.scale ** 2
FI[:, 0, 0] = 1 / self.scale**2
FI[:, 1, 1] = 1
return FI

Expand Down
8 changes: 4 additions & 4 deletions ngboost/distns/lognormal.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ def d_score(self, Y):
Z = (lT - self.loc) / self.scale

D_uncens = np.zeros((self.loc.shape[0], 2))
D_uncens[:, 0] = (self.loc - lT) / (self.scale ** 2)
D_uncens[:, 1] = 1 - ((self.loc - lT) ** 2) / (self.scale ** 2)
D_uncens[:, 0] = (self.loc - lT) / (self.scale**2)
D_uncens[:, 1] = 1 - ((self.loc - lT) ** 2) / (self.scale**2)

D_cens = np.zeros((self.loc.shape[0], 2))
D_cens[:, 0] = -sp.stats.norm.pdf(lT, loc=self.loc, scale=self.scale) / (
Expand All @@ -39,7 +39,7 @@ def d_score(self, Y):

def metric(self):
FI = np.zeros((self.loc.shape[0], 2, 2))
FI[:, 0, 0] = 1 / (self.scale ** 2) + self.eps
FI[:, 0, 0] = 1 / (self.scale**2) + self.eps
FI[:, 1, 1] = 2
return FI

Expand Down Expand Up @@ -83,7 +83,7 @@ def d_score(self, Y):
def metric(self):
I = np.zeros((self.loc.shape[0], 2, 2))
I[:, 0, 0] = 2
I[:, 1, 1] = self.scale ** 2
I[:, 1, 1] = self.scale**2
I /= 2 * np.sqrt(np.pi)
return I

Expand Down
2 changes: 1 addition & 1 deletion ngboost/distns/normal.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def __init__(self, params):
super().__init__(params)
self.loc = params[0]
self.scale = np.exp(params[1])
self.var = self.scale ** 2
self.var = self.scale**2
self.dist = dist(loc=self.loc, scale=self.scale)

def fit(Y):
Expand Down
4 changes: 2 additions & 2 deletions ngboost/distns/t.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def __init__(self, params):
super().__init__(params)
self.loc = params[0]
self.scale = np.exp(params[1])
self.var = self.scale ** 2
self.var = self.scale**2
self.df = np.exp(params[2])
self.dist = dist(loc=self.loc, scale=self.scale, df=self.df)

Expand Down Expand Up @@ -123,7 +123,7 @@ def __init__(self, params):
super().__init__(params)
self.loc = params[0]
self.scale = np.exp(params[1])
self.var = self.scale ** 2
self.var = self.scale**2
# fixed df
self.df = np.ones_like(self.loc) * self.fixed_df
self.dist = dist(loc=self.loc, scale=self.scale, df=self.df)
Expand Down
4 changes: 2 additions & 2 deletions ngboost/distns/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ class SurvivalDistn(Dist):

def fit(Y):
"""
Parameters:
Y : a object with keys {time, event}, each containing an array
Parameters:
Y : a object with keys {time, event}, each containing an array
"""
return Dist.fit(Y["Time"])

Expand Down
16 changes: 8 additions & 8 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "ngboost"
version = "0.3.13dev"
version = "0.3.14dev"
description = "Library for probabilistic predictions via gradient boosting."
authors = ["Stanford ML Group <[email protected]>"]
readme = "README.md"
Expand All @@ -13,21 +13,21 @@ classifiers = [
license = "Apache License 2.0"

[tool.poetry.dependencies]
python = ">=3.6.2, <4.0"
scikit-learn = ">=0.21"
numpy = ">=1.17"
scipy = ">=1.3"
python = ">=3.7.1, <3.11"
scikit-learn = ">=1.0.2"
numpy = ">=1.21.2"
scipy = ">=1.7.2"
tqdm = ">=4.3"
lifelines = ">=0.25"

pandas = ">=1.3.5"
flake8 = "^5.0.4"

[tool.poetry.dev-dependencies]
pytest = "^6.1.2"
black = "^20.8b1"
black = "^22.8.0"
pre-commit = "^2.0"
isort = "^5.6.4"
pylint = "^2.6.0"
flake8 = "^3.8.4"

[build-system]
requires = ["poetry-core>=1.0.0"]
Expand Down
20 changes: 12 additions & 8 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import numpy as np
import pytest
from sklearn.datasets import load_boston, load_breast_cancer
from sklearn.datasets import fetch_california_housing, load_breast_cancer
from sklearn.model_selection import train_test_split

Tuple4Array = Tuple[np.array, np.array, np.array, np.array]
Expand All @@ -23,27 +23,31 @@ def pytest_configure(config):


@pytest.fixture(scope="session")
def boston_data() -> Tuple4Array:
X, Y = load_boston(True)
def california_housing_data() -> Tuple4Array:
X, Y = fetch_california_housing(return_X_y=True)
return train_test_split(X, Y, test_size=0.2, random_state=23)


@pytest.fixture(scope="session")
def boston_survival_data() -> Tuple5Array:
X, Y = load_boston(True)
def california_housing_survival_data() -> Tuple5Array:
X, Y = fetch_california_housing(return_X_y=True)
X_surv_train, X_surv_test, Y_surv_train, Y_surv_test = train_test_split(
X, Y, test_size=0.2, random_state=14
)

# calculate threshold for censoring data
censor_threshold = np.quantile(Y_surv_train, 0.75)
# introduce administrative censoring to simulate survival data
T_surv_train = np.minimum(Y_surv_train, 30) # time of an event or censoring
T_surv_train = np.minimum(
Y_surv_train, censor_threshold
) # time of an event or censoring
E_surv_train = (
Y_surv_train > 30
Y_surv_train > censor_threshold
) # 1 if T[i] is the time of an event, 0 if it's a time of censoring
return X_surv_train, X_surv_test, T_surv_train, E_surv_train, Y_surv_test


@pytest.fixture(scope="session")
def breast_cancer_data() -> Tuple4Array:
X, Y = load_breast_cancer(True)
X, Y = load_breast_cancer(return_X_y=True)
return train_test_split(X, Y, test_size=0.2, random_state=12)
4 changes: 2 additions & 2 deletions tests/test_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,12 @@ def test_classification(breast_cancer_data):


# TODO: This is non-deterministic in the model fitting
def test_regression(boston_data):
def test_regression(california_housing_data):
from sklearn.metrics import ( # pylint: disable=import-outside-toplevel
mean_squared_error,
)

x_train, x_test, y_train, y_test = boston_data
x_train, x_test, y_train, y_test = california_housing_data
ngb = NGBRegressor(verbose=False)
ngb.fit(x_train, y_train)
preds = ngb.predict(x_test)
Expand Down
22 changes: 12 additions & 10 deletions tests/test_distns.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,8 @@
DecisionTreeRegressor(criterion="friedman_mse", max_depth=5),
],
)
def test_dists_runs_on_examples_logscore(
dist: Distn, learner, boston_data: Tuple4Array
):
X_train, X_test, y_train, y_test = boston_data
def test_dists_runs_on_examples_logscore(dist: Distn, learner, california_housing_data):
X_train, X_test, y_train, y_test = california_housing_data
# TODO: test early stopping features
ngb = NGBRegressor(Dist=dist, Score=LogScore, Base=learner, verbose=False)
ngb.fit(X_train, y_train)
Expand All @@ -61,10 +59,8 @@ def test_dists_runs_on_examples_logscore(
DecisionTreeRegressor(criterion="friedman_mse", max_depth=5),
],
)
def test_dists_runs_on_examples_crpscore(
dist: Distn, learner, boston_data: Tuple4Array
):
X_train, X_test, y_train, y_test = boston_data
def test_dists_runs_on_examples_crpscore(dist: Distn, learner, california_housing_data):
X_train, X_test, y_train, y_test = california_housing_data
# TODO: test early stopping features
ngb = NGBRegressor(Dist=dist, Score=CRPScore, Base=learner, verbose=False)
ngb.fit(X_train, y_train)
Expand All @@ -83,9 +79,15 @@ def test_dists_runs_on_examples_crpscore(
],
)
def test_survival_runs_on_examples(
dist: Distn, score: Score, learner, boston_survival_data: Tuple5Array
dist: Distn, score: Score, learner, california_housing_survival_data
):
X_train, X_test, T_surv_train, E_surv_train, Y_surv_test = boston_survival_data
(
X_train,
X_test,
T_surv_train,
E_surv_train,
Y_surv_test,
) = california_housing_survival_data
# test early stopping features
ngb = NGBSurvival(Dist=dist, Score=score, Base=learner, verbose=False)
ngb.fit(X_train, T_surv_train, E_surv_train)
Expand Down
14 changes: 8 additions & 6 deletions tests/test_pickling.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@

# name = learners_data to avoid pylint redefined-outer-name
@pytest.fixture(name="learners_data")
def fixture_learners_data(breast_cancer_data, boston_data, boston_survival_data):
def fixture_learners_data(
breast_cancer_data, california_housing_data, california_housing_survival_data
):
"""
Returns:
A list of iterables,
Expand All @@ -23,26 +25,26 @@ def fixture_learners_data(breast_cancer_data, boston_data, boston_survival_data)
ngb.fit(X_class_train, Y_class_train)
models_data.append((ngb, X_class_train, ngb.predict(X_class_train)))

X_reg_train, _, Y_reg_train, _ = boston_data
X_reg_train, _, Y_reg_train, _ = california_housing_data
ngb = NGBRegressor(verbose=False, n_estimators=10)
ngb.fit(X_reg_train, Y_reg_train)
models_data.append((ngb, X_reg_train, ngb.predict(X_reg_train)))

X_surv_train, _, T_surv_train, E_surv_train, _ = boston_survival_data
X_surv_train, _, T_surv_train, E_surv_train, _ = california_housing_survival_data
ngb = NGBSurvival(verbose=False, n_estimators=10)
ngb.fit(X_surv_train, T_surv_train, E_surv_train)
models_data.append((ngb, X_surv_train, ngb.predict(X_surv_train)))

ngb = NGBRegressor(Dist=MultivariateNormal(2), n_estimators=10)
ngb.fit(X_surv_train, np.vstack([T_surv_train, E_surv_train]).T)
ngb.fit(X_surv_train, np.vstack((T_surv_train, E_surv_train)).T)
models_data.append((ngb, X_surv_train, ngb.predict(X_surv_train)))
return models_data


def test_model_save(learners_data):
"""
Tests that the model can be loaded and predict still works
It checks that the new predictions are the same as pre-pickling
Tests that the model can be loaded and predict still works
It checks that the new predictions are the same as pre-pickling
"""
for learner, data, preds in learners_data:
serial = pickle.dumps(learner)
Expand Down

0 comments on commit 3a064b4

Please sign in to comment.