From 4bede65d0fe93debcfd90b8172bd3eaf8597961e Mon Sep 17 00:00:00 2001 From: ismail simsek <6005685+ismailsimsek@users.noreply.github.com> Date: Tue, 17 Dec 2024 00:11:31 +0100 Subject: [PATCH] [Feature] Enable cross project dbt ref support, `dbt mesh`, `multi porject dbt setup` (#49) * Enable multi project references and settings Revert "Enable multi project references and settings" This reverts commit 2686a198b2e02c16a3ab0e4e356851407e342f44. * Enable multi project references and settings * Enable multi project references and settings --- opendbt/dbt/__init__.py | 4 ++ opendbt/dbt/v17/config/__init__.py | 0 opendbt/dbt/v17/config/runtime.py | 58 +++++++++++++++++++ opendbt/dbt/v18/config/__init__.py | 0 opendbt/dbt/v18/config/runtime.py | 52 +++++++++++++++++ tests/resources/dbtcore/profiles.yml | 2 +- tests/resources/dbtfinance/.gitignore | 4 ++ tests/resources/dbtfinance/dbt_project.yml | 9 +++ tests/resources/dbtfinance/dependencies.yml | 7 +++ .../models/my_cross_project_ref_model.sql | 2 + tests/resources/dbtfinance/profiles.yml | 15 +++++ tests/test_opendbt_cli.py | 6 ++ 12 files changed, 158 insertions(+), 1 deletion(-) create mode 100644 opendbt/dbt/v17/config/__init__.py create mode 100644 opendbt/dbt/v17/config/runtime.py create mode 100644 opendbt/dbt/v18/config/__init__.py create mode 100644 opendbt/dbt/v18/config/runtime.py create mode 100644 tests/resources/dbtfinance/.gitignore create mode 100644 tests/resources/dbtfinance/dbt_project.yml create mode 100644 tests/resources/dbtfinance/dependencies.yml create mode 100644 tests/resources/dbtfinance/models/my_cross_project_ref_model.sql create mode 100644 tests/resources/dbtfinance/profiles.yml diff --git a/opendbt/dbt/__init__.py b/opendbt/dbt/__init__.py index 758423d..43bef51 100644 --- a/opendbt/dbt/__init__.py +++ b/opendbt/dbt/__init__.py @@ -9,6 +9,8 @@ def patch_dbt(): # ================================================================================================================ dbt_version = Version(version.get_installed_version().to_version_string(skip_matcher=True)) if Version("1.6.0") <= dbt_version < Version("1.8.0"): + from opendbt.dbt.v17.config.runtime import OpenDbtRuntimeConfig + dbt.config.RuntimeConfig = OpenDbtRuntimeConfig from opendbt.dbt.v17.task.docs.generate import OpenDbtGenerateTask dbt.task.generate.GenerateTask = OpenDbtGenerateTask from opendbt.dbt.v17.adapters.factory import OpenDbtAdapterContainer @@ -16,6 +18,8 @@ def patch_dbt(): from opendbt.dbt.v17.task.run import ModelRunner dbt.task.run.ModelRunner = ModelRunner elif Version("1.8.0") <= dbt_version < Version("1.10.0"): + from opendbt.dbt.v18.config.runtime import OpenDbtRuntimeConfig + dbt.config.RuntimeConfig = OpenDbtRuntimeConfig from opendbt.dbt.v18.task.docs.generate import OpenDbtGenerateTask dbt.task.docs.generate.GenerateTask = OpenDbtGenerateTask from opendbt.dbt.v18.adapters.factory import OpenDbtAdapterContainer diff --git a/opendbt/dbt/v17/config/__init__.py b/opendbt/dbt/v17/config/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/opendbt/dbt/v17/config/runtime.py b/opendbt/dbt/v17/config/runtime.py new file mode 100644 index 0000000..6e7921c --- /dev/null +++ b/opendbt/dbt/v17/config/runtime.py @@ -0,0 +1,58 @@ +from dataclasses import dataclass +from pathlib import Path +from typing import Mapping + +from dbt.config import RuntimeConfig +from dbt.config.project import path_exists, _load_yaml +from dbt.constants import DEPENDENCIES_FILE_NAME +from dbt.exceptions import ( + DbtProjectError, NonUniquePackageNameError, +) +from typing_extensions import override + + +def load_yml_dict(file_path): + ret = {} + if path_exists(file_path): + ret = _load_yaml(file_path) or {} + return ret + +# pylint: disable=too-many-ancestors +@dataclass +class OpenDbtRuntimeConfig(RuntimeConfig): + def load_dependence_projects(self): + dependencies_yml_dict = load_yml_dict(f"{self.project_root}/{DEPENDENCIES_FILE_NAME}") + + if "projects" not in dependencies_yml_dict: + return + + projects = dependencies_yml_dict["projects"] + project_root_parent = Path(self.project_root).parent + for project in projects: + path = project_root_parent.joinpath(project['name']) + try: + project = self.new_project(str(path.as_posix())) + except DbtProjectError as e: + raise DbtProjectError( + f"Failed to read depending project: {e} \n project path:{path.as_posix()}", + result_type="invalid_project", + path=path, + ) from e + + yield project.project_name, project + + @override + def load_dependencies(self, base_only=False) -> Mapping[str, "RuntimeConfig"]: + # if self.dependencies is None: + + if self.dependencies is None: + # this sets self.dependencies variable! + self.dependencies = super().load_dependencies(base_only=base_only) + + # additionally load `projects` defined in `dependencies.yml` + for project_name, project in self.load_dependence_projects(): + if project_name in self.dependencies: + raise NonUniquePackageNameError(project_name) + self.dependencies[project_name] = project + + return self.dependencies diff --git a/opendbt/dbt/v18/config/__init__.py b/opendbt/dbt/v18/config/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/opendbt/dbt/v18/config/runtime.py b/opendbt/dbt/v18/config/runtime.py new file mode 100644 index 0000000..3b48cf8 --- /dev/null +++ b/opendbt/dbt/v18/config/runtime.py @@ -0,0 +1,52 @@ +from dataclasses import dataclass +from pathlib import Path +from typing import Mapping + +from dbt.config import RuntimeConfig +from dbt.config.project import load_yml_dict +from dbt.constants import DEPENDENCIES_FILE_NAME +from dbt.exceptions import ( + DbtProjectError, NonUniquePackageNameError, +) +from typing_extensions import override + + +# pylint: disable=too-many-ancestors +@dataclass +class OpenDbtRuntimeConfig(RuntimeConfig): + def load_dependence_projects(self): + dependencies_yml_dict = load_yml_dict(f"{self.project_root}/{DEPENDENCIES_FILE_NAME}") + + if "projects" not in dependencies_yml_dict: + return + + projects = dependencies_yml_dict["projects"] + project_root_parent = Path(self.project_root).parent + for project in projects: + path = project_root_parent.joinpath(project['name']) + try: + project = self.new_project(str(path.as_posix())) + except DbtProjectError as e: + raise DbtProjectError( + f"Failed to read depending project: {e} \n project path:{path.as_posix()}", + result_type="invalid_project", + path=path, + ) from e + + yield project.project_name, project + + @override + def load_dependencies(self, base_only=False) -> Mapping[str, "RuntimeConfig"]: + # if self.dependencies is None: + + if self.dependencies is None: + # this sets self.dependencies variable! + self.dependencies = super().load_dependencies(base_only=base_only) + + # additionally load `projects` defined in `dependencies.yml` + for project_name, project in self.load_dependence_projects(): + if project_name in self.dependencies: + raise NonUniquePackageNameError(project_name) + self.dependencies[project_name] = project + + return self.dependencies diff --git a/tests/resources/dbtcore/profiles.yml b/tests/resources/dbtcore/profiles.yml index e8b48b0..6f5d832 100644 --- a/tests/resources/dbtcore/profiles.yml +++ b/tests/resources/dbtcore/profiles.yml @@ -3,7 +3,7 @@ dbtcore: dev: type: duckdb adapter: my.dbt.custom.OpenAdapterXXX - path: dev.duckdb + path: ./../dev.duckdb threads: 1 prod: diff --git a/tests/resources/dbtfinance/.gitignore b/tests/resources/dbtfinance/.gitignore new file mode 100644 index 0000000..49f147c --- /dev/null +++ b/tests/resources/dbtfinance/.gitignore @@ -0,0 +1,4 @@ + +target/ +dbt_packages/ +logs/ diff --git a/tests/resources/dbtfinance/dbt_project.yml b/tests/resources/dbtfinance/dbt_project.yml new file mode 100644 index 0000000..a72dc87 --- /dev/null +++ b/tests/resources/dbtfinance/dbt_project.yml @@ -0,0 +1,9 @@ +name: 'dbtfinance' +version: '1.0.0' + +profile: 'dbtfinance' + +# directories to be removed by `dbt clean` +clean-targets: + - "target" + - "dbt_packages" \ No newline at end of file diff --git a/tests/resources/dbtfinance/dependencies.yml b/tests/resources/dbtfinance/dependencies.yml new file mode 100644 index 0000000..146d44a --- /dev/null +++ b/tests/resources/dbtfinance/dependencies.yml @@ -0,0 +1,7 @@ +#packages: +# - package: dbt-labs/dbt_utils +# version: 1.1.1 + +# case-sensitive and matches the 'name' in the 'dbt_project.yml' +projects: + - name: dbtcore \ No newline at end of file diff --git a/tests/resources/dbtfinance/models/my_cross_project_ref_model.sql b/tests/resources/dbtfinance/models/my_cross_project_ref_model.sql new file mode 100644 index 0000000..7a86e47 --- /dev/null +++ b/tests/resources/dbtfinance/models/my_cross_project_ref_model.sql @@ -0,0 +1,2 @@ + +select * from {{ ref('dbtcore', 'my_core_table1') }} diff --git a/tests/resources/dbtfinance/profiles.yml b/tests/resources/dbtfinance/profiles.yml new file mode 100644 index 0000000..bd4f8df --- /dev/null +++ b/tests/resources/dbtfinance/profiles.yml @@ -0,0 +1,15 @@ +dbtfinance: + outputs: + dev: + type: duckdb + adapter: my.dbt.custom.OpenAdapterXXX + path: ./../dev.duckdb + threads: 1 + + prod: + type: duckdb + adapter: my.dbt.custom.OpenAdapterXXX + path: prod.duckdb + threads: 4 + + target: dev diff --git a/tests/test_opendbt_cli.py b/tests/test_opendbt_cli.py index 24849f9..4e4365b 100644 --- a/tests/test_opendbt_cli.py +++ b/tests/test_opendbt_cli.py @@ -37,3 +37,9 @@ def test_cli_run_models(self): dp = OpenDbtCli(project_dir=self.DBTCORE_DIR) dp.invoke(args=['run', '--select', 'my_first_dbt_model+', "--exclude", "my_failing_dbt_model", "--profiles-dir", dp.project_dir.as_posix()]) + + def test_cli_run_cross_project_ref_models(self): + dpf = OpenDbtCli(project_dir=self.DBTFINANCE_DIR) + dpc = OpenDbtCli(project_dir=self.DBTCORE_DIR) + dpc.invoke(args=['run', '--select', 'my_core_table1', "--profiles-dir", dpc.project_dir.as_posix()]) + dpf.invoke(args=['run', '--select', 'my_cross_project_ref_model', "--profiles-dir", dpf.project_dir.as_posix()])