From f3e4b21f86e399a9ef797ed653a8aaa9853c1eea Mon Sep 17 00:00:00 2001 From: Bazire Date: Tue, 26 Sep 2023 18:56:21 +0200 Subject: [PATCH] Activating session on fixture to try to speed up tests --- .../tests/fixtures/amazon_review__binary_classification.py | 4 ++-- python-client/tests/fixtures/diabetes__regression.py | 2 +- .../drug_classification__multiclass_classification.py | 4 ++-- .../tests/fixtures/enron_multilabel_classification.py | 4 ++-- .../tests/fixtures/fraud_detection__binary_classification.py | 4 ++-- python-client/tests/fixtures/german_credit_scoring.py | 4 ++-- python-client/tests/fixtures/hotel_text__regression.py | 4 ++-- .../fixtures/medical_transcript_multiclass_classification.py | 4 ++-- python-client/tests/fixtures/pytorch_sst2.py | 4 ++-- python-client/tests/fixtures/titanic.py | 2 +- .../tests/fixtures/tripadvisor_text_classification_torch.py | 4 ++-- python-client/tests/fixtures/xboost_classification.py | 4 ++-- 12 files changed, 22 insertions(+), 22 deletions(-) diff --git a/python-client/tests/fixtures/amazon_review__binary_classification.py b/python-client/tests/fixtures/amazon_review__binary_classification.py index b02ba9f685..10114cec08 100644 --- a/python-client/tests/fixtures/amazon_review__binary_classification.py +++ b/python-client/tests/fixtures/amazon_review__binary_classification.py @@ -56,7 +56,7 @@ def preprocess_data(df: pd.DataFrame) -> pd.DataFrame: return df -@pytest.fixture() +@pytest.fixture(scope="session") def amazon_review_data() -> Dataset: raw_data = preprocess_data(download_data(nrows=5000)) wrapped_data = Dataset( @@ -88,7 +88,7 @@ def tokenizer(x): return stems -@pytest.fixture() +@pytest.fixture(scope="session") def amazon_review_model(amazon_review_data: Dataset) -> SKLearnModel: x = amazon_review_data.df[[FEATURE_COLUMN_NAME]] y = amazon_review_data.df[TARGET_COLUMN_NAME] diff --git a/python-client/tests/fixtures/diabetes__regression.py b/python-client/tests/fixtures/diabetes__regression.py index e8fedf70b5..22e16e302e 100644 --- a/python-client/tests/fixtures/diabetes__regression.py +++ b/python-client/tests/fixtures/diabetes__regression.py @@ -8,7 +8,7 @@ from giskard.ml_worker.utils.logging import Timer -@pytest.fixture() +@pytest.fixture(scope="session") def linear_regression_diabetes_raw(): timer = Timer() diabetes = datasets.load_diabetes() diff --git a/python-client/tests/fixtures/drug_classification__multiclass_classification.py b/python-client/tests/fixtures/drug_classification__multiclass_classification.py index 8765ef2955..6412ce2391 100644 --- a/python-client/tests/fixtures/drug_classification__multiclass_classification.py +++ b/python-client/tests/fixtures/drug_classification__multiclass_classification.py @@ -47,7 +47,7 @@ def _bin_na_to_k(_df: pd.DataFrame) -> pd.DataFrame: return df -@pytest.fixture() +@pytest.fixture(scope="session") def drug_classification_data() -> Dataset: # Download data. fetch_from_ftp(DATA_URL, DATA_PATH) @@ -63,7 +63,7 @@ def drug_classification_data() -> Dataset: return wrapped_dataset -@pytest.fixture() +@pytest.fixture(scope="session") def drug_classification_model(drug_classification_data) -> SKLearnModel: x = drug_classification_data.df.drop(TARGET_NAME, axis=1) y = drug_classification_data.df.Drug diff --git a/python-client/tests/fixtures/enron_multilabel_classification.py b/python-client/tests/fixtures/enron_multilabel_classification.py index 2b9e9790a3..db07b76325 100644 --- a/python-client/tests/fixtures/enron_multilabel_classification.py +++ b/python-client/tests/fixtures/enron_multilabel_classification.py @@ -65,7 +65,7 @@ def get_labels(filename): return dict(labels) -@pytest.fixture() +@pytest.fixture(scope="session") def enron_data() -> Dataset: logger.info("Fetching Enron Data") df = pd.read_csv(path("test_data/enron_data.csv"), keep_default_na=False, na_values=["_GSK_NA_"]) @@ -127,7 +127,7 @@ def enron_test_data(enron_data): return Dataset(df=pd.DataFrame(enron_data.df).drop(columns=["Target"]), target=None, column_types=input_types) -@pytest.fixture() +@pytest.fixture(scope="session") def enron_model(enron_data) -> SKLearnModel: timer = Timer() diff --git a/python-client/tests/fixtures/fraud_detection__binary_classification.py b/python-client/tests/fixtures/fraud_detection__binary_classification.py index aff46911e3..6bbe4f3533 100644 --- a/python-client/tests/fixtures/fraud_detection__binary_classification.py +++ b/python-client/tests/fixtures/fraud_detection__binary_classification.py @@ -148,7 +148,7 @@ def fraud_detection_data() -> Dataset: return wrapped_dataset -@pytest.fixture() +@pytest.fixture(scope="session") def fraud_detection_train_data() -> Dataset: train_set, _ = preprocess_dataset(*read_dataset()) wrapped_dataset = Dataset( @@ -157,7 +157,7 @@ def fraud_detection_train_data() -> Dataset: return wrapped_dataset -@pytest.fixture() +@pytest.fixture(scope="session") def fraud_detection_model(fraud_detection_train_data: Dataset) -> Model: from lightgbm import LGBMClassifier diff --git a/python-client/tests/fixtures/german_credit_scoring.py b/python-client/tests/fixtures/german_credit_scoring.py index 9036386959..1b9d5814e2 100644 --- a/python-client/tests/fixtures/german_credit_scoring.py +++ b/python-client/tests/fixtures/german_credit_scoring.py @@ -43,7 +43,7 @@ } -@pytest.fixture() +@pytest.fixture(scope="session") def german_credit_data() -> Dataset: logger.info("Reading german_credit_prepared.csv") df = pd.read_csv(path("test_data/german_credit_prepared.csv"), keep_default_na=False, na_values=["_GSK_NA_"]) @@ -92,7 +92,7 @@ def german_credit_test_data(german_credit_data): return Dataset(df=df, target=None, column_types=input_types) -@pytest.fixture() +@pytest.fixture(scope="session") def german_credit_raw_model(german_credit_data): timer = Timer() diff --git a/python-client/tests/fixtures/hotel_text__regression.py b/python-client/tests/fixtures/hotel_text__regression.py index 4961981707..3db72a5a97 100644 --- a/python-client/tests/fixtures/hotel_text__regression.py +++ b/python-client/tests/fixtures/hotel_text__regression.py @@ -31,7 +31,7 @@ def load_data(**kwargs) -> pd.DataFrame: return df -@pytest.fixture +@pytest.fixture(scope="session") def hotel_text_data() -> Dataset: fetch_from_ftp(DATA_URL, DATA_PATH) @@ -56,7 +56,7 @@ def adapt_vectorizer_input(df: pd.DataFrame) -> Iterable: return df -@pytest.fixture +@pytest.fixture(scope="session") def hotel_text_model(hotel_text_data) -> SKLearnModel: x = hotel_text_data.df[[FEATURE_COLUMN_NAME]] y = hotel_text_data.df[TARGET_COLUMN_NAME] diff --git a/python-client/tests/fixtures/medical_transcript_multiclass_classification.py b/python-client/tests/fixtures/medical_transcript_multiclass_classification.py index bbf68b0d64..d984c9614f 100644 --- a/python-client/tests/fixtures/medical_transcript_multiclass_classification.py +++ b/python-client/tests/fixtures/medical_transcript_multiclass_classification.py @@ -56,7 +56,7 @@ def load_data() -> pd.DataFrame: return df -@pytest.fixture() +@pytest.fixture(scope="session") def medical_transcript_data() -> Dataset: raw_data = load_data() wrapped_data = Dataset( @@ -85,7 +85,7 @@ def adapt_vectorizer_input(df: pd.DataFrame) -> Iterable: return df -@pytest.fixture() +@pytest.fixture(scope="session") def medical_transcript_model(medical_transcript_data: Dataset) -> SKLearnModel: # Define final pipeline. pipeline = Pipeline( diff --git a/python-client/tests/fixtures/pytorch_sst2.py b/python-client/tests/fixtures/pytorch_sst2.py index 785bc8da3e..90187bd8ff 100644 --- a/python-client/tests/fixtures/pytorch_sst2.py +++ b/python-client/tests/fixtures/pytorch_sst2.py @@ -13,7 +13,7 @@ from tests.utils import resource_dir -@pytest.fixture() +@pytest.fixture(scope="session") def sst2_dev_data(): dev_datapipe = SST2(split="dev") return pd.DataFrame(dev_datapipe, columns=["text", "label"]) @@ -24,7 +24,7 @@ def sst2_data(sst2_dev_data): return Dataset(sst2_dev_data.head(), name="test dataset", target="label") -@pytest.fixture() +@pytest.fixture(scope="session") def sst2_model(sst2_dev_data): torch_softmax = nn.Softmax(dim=1) device = "cuda" if torch.cuda.is_available() else "cpu" diff --git a/python-client/tests/fixtures/titanic.py b/python-client/tests/fixtures/titanic.py index f1dccd1232..fedacfcc10 100644 --- a/python-client/tests/fixtures/titanic.py +++ b/python-client/tests/fixtures/titanic.py @@ -4,7 +4,7 @@ from giskard.demo import titanic -@pytest.fixture() +@pytest.fixture(scope="session") def titanic_model_data_raw(): return titanic() diff --git a/python-client/tests/fixtures/tripadvisor_text_classification_torch.py b/python-client/tests/fixtures/tripadvisor_text_classification_torch.py index 2468e57af7..157a575c82 100644 --- a/python-client/tests/fixtures/tripadvisor_text_classification_torch.py +++ b/python-client/tests/fixtures/tripadvisor_text_classification_torch.py @@ -201,7 +201,7 @@ def model_predict(self, df: pd.DataFrame) -> np.ndarray: return predicted_probabilities -@pytest.fixture() +@pytest.fixture(scope="session") def tripadvisor_data() -> Dataset: # Download dataset df = load_dataset() @@ -210,7 +210,7 @@ def tripadvisor_data() -> Dataset: ) -@pytest.fixture() +@pytest.fixture(scope="session") def tripadvisor_model(tripadvisor_data: Dataset) -> Model: # Load model. model = DistilBertForSequenceClassification.from_pretrained( diff --git a/python-client/tests/fixtures/xboost_classification.py b/python-client/tests/fixtures/xboost_classification.py index 316394f1d6..912fecdcf2 100644 --- a/python-client/tests/fixtures/xboost_classification.py +++ b/python-client/tests/fixtures/xboost_classification.py @@ -16,7 +16,7 @@ TARGET_COLUMN_NAME = "target" -@pytest.fixture() +@pytest.fixture(scope="session") def breast_cancer_data() -> Dataset: logger.info("Fetching Breast Cancer Data") raw_data = load_breast_cancer(as_frame=True) @@ -25,7 +25,7 @@ def breast_cancer_data() -> Dataset: return Dataset(df, name="breast_cancer", target="target", column_types=column_types) -@pytest.fixture() +@pytest.fixture(scope="session") def breast_cancer_model(breast_cancer_data: Dataset) -> Model: X_train, X_test, y_train, y_test = train_test_split( breast_cancer_data.df.loc[:, breast_cancer_data.df.columns != TARGET_COLUMN_NAME],