From 613fa58ff9d1f06877ad8790a145d91f5913f862 Mon Sep 17 00:00:00 2001 From: colin-rogers-dbt <111200756+colin-rogers-dbt@users.noreply.github.com> Date: Tue, 20 Feb 2024 07:53:05 -0800 Subject: [PATCH] fix spark cluster start mechanism and add extra dev requirements (#986) --- dagger/run_dbt_spark_tests.py | 18 +++++++++++++++--- dev-requirements.txt | 3 +++ tests/functional/conftest.py | 28 ++++++++++++++++++---------- 3 files changed, 36 insertions(+), 13 deletions(-) diff --git a/dagger/run_dbt_spark_tests.py b/dagger/run_dbt_spark_tests.py index 436cb1e92..15f9cf2c2 100644 --- a/dagger/run_dbt_spark_tests.py +++ b/dagger/run_dbt_spark_tests.py @@ -112,15 +112,27 @@ async def test_spark(test_args): .with_exec(["./scripts/install_os_reqs.sh"]) # install dbt-spark + python deps .with_directory("/src", req_files) - .with_directory("src/dbt", dbt_spark_dir) - .with_directory("src/tests", test_dir) - .with_workdir("/src") .with_exec(["pip", "install", "-U", "pip"]) + .with_workdir("/src") .with_exec(["pip", "install", "-r", "requirements.txt"]) .with_exec(["pip", "install", "-r", "dev-requirements.txt"]) + ) + + # install local dbt-spark changes + tst_container = ( + tst_container.with_workdir("/") + .with_directory("src/dbt", dbt_spark_dir) + .with_workdir("/src") .with_exec(["pip", "install", "-e", "."]) ) + # install local test changes + tst_container = ( + tst_container.with_workdir("/") + .with_directory("src/tests", test_dir) + .with_workdir("/src") + ) + if test_profile == "apache_spark": spark_ctr, spark_host = get_spark_container(client) tst_container = tst_container.with_service_binding(alias=spark_host, service=spark_ctr) diff --git a/dev-requirements.txt b/dev-requirements.txt index 28a626fc3..8f674d84b 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,5 +1,8 @@ # install latest changes in dbt-core # TODO: how to automate switching from develop to version branches? +git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-core&subdirectory=core +git+https://github.com/dbt-labs/dbt-common.git +git+https://github.com/dbt-labs/dbt-adapters.git git+https://github.com/dbt-labs/dbt-adapters.git#subdirectory=dbt-tests-adapter # if version 1.x or greater -> pin to major version diff --git a/tests/functional/conftest.py b/tests/functional/conftest.py index c1a0397bd..476ffb474 100644 --- a/tests/functional/conftest.py +++ b/tests/functional/conftest.py @@ -1,19 +1,27 @@ -from multiprocessing import Lock - +import time import pytest -_db_start_lock = Lock() -_DB_CLUSTER_STARTED = False + +def _wait_for_databricks_cluster(project): + """ + It takes roughly 3min for the cluster to start, to be safe we'll wait for 5min + """ + for _ in range(60): + try: + project.run_sql("SELECT 1", fetch=True) + return + except Exception: + time.sleep(10) + + raise Exception("Databricks cluster did not start in time") # Running this should prevent tests from needing to be retried because the Databricks cluster isn't available @pytest.fixture(scope="class", autouse=True) def start_databricks_cluster(project, request): - global _DB_CLUSTER_STARTED profile_type = request.config.getoption("--profile") - with _db_start_lock: - if "databricks" in profile_type and not _DB_CLUSTER_STARTED: - print("Starting Databricks cluster") - project.run_sql("SELECT 1") - _DB_CLUSTER_STARTED = True + if "databricks" in profile_type: + _wait_for_databricks_cluster(project) + + yield 1