Skip to content

Commit

Permalink
fix spark cluster start mechanism and add extra dev requirements (#986)
Browse files Browse the repository at this point in the history
  • Loading branch information
colin-rogers-dbt authored Feb 20, 2024
1 parent 5d90ff9 commit 613fa58
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 13 deletions.
18 changes: 15 additions & 3 deletions dagger/run_dbt_spark_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,15 +112,27 @@ async def test_spark(test_args):
.with_exec(["./scripts/install_os_reqs.sh"])
# install dbt-spark + python deps
.with_directory("/src", req_files)
.with_directory("src/dbt", dbt_spark_dir)
.with_directory("src/tests", test_dir)
.with_workdir("/src")
.with_exec(["pip", "install", "-U", "pip"])
.with_workdir("/src")
.with_exec(["pip", "install", "-r", "requirements.txt"])
.with_exec(["pip", "install", "-r", "dev-requirements.txt"])
)

# install local dbt-spark changes
tst_container = (
tst_container.with_workdir("/")
.with_directory("src/dbt", dbt_spark_dir)
.with_workdir("/src")
.with_exec(["pip", "install", "-e", "."])
)

# install local test changes
tst_container = (
tst_container.with_workdir("/")
.with_directory("src/tests", test_dir)
.with_workdir("/src")
)

if test_profile == "apache_spark":
spark_ctr, spark_host = get_spark_container(client)
tst_container = tst_container.with_service_binding(alias=spark_host, service=spark_ctr)
Expand Down
3 changes: 3 additions & 0 deletions dev-requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# install latest changes in dbt-core
# TODO: how to automate switching from develop to version branches?
git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-core&subdirectory=core
git+https://github.com/dbt-labs/dbt-common.git
git+https://github.com/dbt-labs/dbt-adapters.git
git+https://github.com/dbt-labs/dbt-adapters.git#subdirectory=dbt-tests-adapter

# if version 1.x or greater -> pin to major version
Expand Down
28 changes: 18 additions & 10 deletions tests/functional/conftest.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,27 @@
from multiprocessing import Lock

import time
import pytest

_db_start_lock = Lock()
_DB_CLUSTER_STARTED = False

def _wait_for_databricks_cluster(project):
"""
It takes roughly 3min for the cluster to start, to be safe we'll wait for 5min
"""
for _ in range(60):
try:
project.run_sql("SELECT 1", fetch=True)
return
except Exception:
time.sleep(10)

raise Exception("Databricks cluster did not start in time")


# Running this should prevent tests from needing to be retried because the Databricks cluster isn't available
@pytest.fixture(scope="class", autouse=True)
def start_databricks_cluster(project, request):
global _DB_CLUSTER_STARTED
profile_type = request.config.getoption("--profile")
with _db_start_lock:
if "databricks" in profile_type and not _DB_CLUSTER_STARTED:
print("Starting Databricks cluster")
project.run_sql("SELECT 1")

_DB_CLUSTER_STARTED = True
if "databricks" in profile_type:
_wait_for_databricks_cluster(project)

yield 1

0 comments on commit 613fa58

Please sign in to comment.