Skip to content

Commit

Permalink
Overwriting rendered column descriptions with the unrendered yaml (#129)
Browse files Browse the repository at this point in the history
* Overwriting rendered column descriptions with the unrendered yaml

* added sourceDefinition path

* update manifest to have real destinations

* change to same method dbt-core uses

* after linter

* Moves docs block in orders to the seed file to test propagation

* create click option for use_direct_yaml_descriptions

* Update get_prior_knowledge function to include "seed" as a valid progenitor

* add test

* rename use_direct_yaml_descriptions to use_unrendered_descriptions

* Add typing for _get_member_yaml
return None on unknown ManifestNode

* add use_unrendered_descriptions to refactor

* linting

* add duckdb to the poetry install to parse manifest

* dynamically parse demo_duckdb at test time

* delete testing manifest.json due to dynamically parsing

* run dbt through poetry

* linter
  • Loading branch information
VDFaller authored Mar 28, 2024
1 parent 97767b4 commit 1c16714
Show file tree
Hide file tree
Showing 9 changed files with 138 additions and 11,356 deletions.
6 changes: 5 additions & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,12 @@ jobs:
poetry --version
- name: Install required packages
run: | # install duckdb extras to be able to parse manifest
poetry install -E duckdb
- name: Parse manifest
run: |
poetry install
poetry run dbt parse --project-dir demo_duckdb --profiles-dir demo_duckdb -t test
- name: Run pytest
run: |
Expand Down
1 change: 0 additions & 1 deletion demo_duckdb/models/schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ models:
description: Date (UTC) that the order was placed

- name: status
description: '{{ doc("orders_status") }}'
tests:
- accepted_values:
values: ['placed', 'shipped', 'completed', 'return_pending', 'returned']
Expand Down
6 changes: 6 additions & 0 deletions demo_duckdb/seeds/schema.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
version: 2
seeds:
- name: raw_orders
columns:
- name: status
description: '{{ doc("orders_status") }}' # putting this in to test if unrendered propogation works
6 changes: 5 additions & 1 deletion src/dbt_osmosis/core/column_level_knowledge.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,11 @@ def get_prior_knowledge(
)
)
sorted_prior_knowledge_candidates_sources = sorted(
[k for k in prior_knowledge_candidates if k["progenitor"].startswith("source")],
[
k
for k in prior_knowledge_candidates
if (k["progenitor"].startswith("source") or k["progenitor"].startswith("seed"))
],
key=lambda k: k["generation"],
reverse=True,
)
Expand Down
54 changes: 53 additions & 1 deletion src/dbt_osmosis/core/column_level_knowledge_propagator.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
from pathlib import Path
from typing import Any, Dict, Iterable, List, Optional

import yaml
from dbt.contracts.graph.nodes import ModelNode, SeedNode, SourceDefinition

from dbt_osmosis.core.column_level_knowledge import (
ColumnLevelKnowledge,
Knowledge,
Expand Down Expand Up @@ -36,10 +40,37 @@ def _build_node_ancestor_tree(
return family_tree


def _get_member_yaml(member: ManifestNode, project_dir: Path) -> Optional[dict]:
"""Get the yaml for a member from the file in the manifest, only returns relevant section"""
if isinstance(member, SourceDefinition):
key = "tables"
elif isinstance(member, ModelNode):
key = "models"
elif isinstance(member, SeedNode):
key = "seeds"
else:
return None

data = None
if key == "tables" and hasattr(member, "original_file_path") and member.original_file_path:
with (project_dir / Path(member.original_file_path)).open("r") as f:
data = yaml.safe_load(f)
data = next((item for item in data["sources"] if item["name"] == member.source_name), None)
elif key in ["seeds", "models"] and hasattr(member, "patch_path") and member.patch_path:
pfp: str = member.patch_path.split("://")[-1]
with (project_dir / Path(pfp)).open() as f:
data = yaml.safe_load(f)
if data:
model_yaml = next((item for item in data[key] if item["name"] == member.name), None)
return model_yaml


def _inherit_column_level_knowledge(
manifest: ManifestNode,
family_tree: Dict[str, Any],
placeholders: List[str],
project_dir: Path = Path.cwd(),
use_unrendered_descriptions: bool = False,
) -> Knowledge:
"""Inherit knowledge from ancestors in reverse insertion order to ensure that the most
recent ancestor is always the one to inherit from
Expand All @@ -50,10 +81,27 @@ def _inherit_column_level_knowledge(
member: ManifestNode = manifest.nodes.get(ancestor, manifest.sources.get(ancestor))
if not member:
continue
if use_unrendered_descriptions:
# overwrite member as the yaml
model_yaml = _get_member_yaml(member, project_dir)
for name, info in member.columns.items():
knowledge_default = {"progenitor": ancestor, "generation": generation}
knowledge.setdefault(name, knowledge_default)
deserialized_info = info.to_dict()
if (
use_unrendered_descriptions and model_yaml
): # overwrite the deserialized info with unrendered column info
col_yaml = next(
(
col
for col in model_yaml["columns"]
if col["name"] == deserialized_info["name"]
),
None,
)
if col_yaml is not None and "description" in col_yaml:
deserialized_info["description"] = col_yaml["description"]

# Handle Info:
# 1. tags are additive
# 2. descriptions are overriden
Expand Down Expand Up @@ -83,10 +131,14 @@ def get_node_columns_with_inherited_knowledge(
manifest: ManifestNode,
node: ManifestNode,
placeholders: List[str],
project_dir: Path = Path.cwd(),
use_unrendered_descriptions: bool = False,
) -> Knowledge:
"""Build a knowledgebase for the model based on iterating through ancestors"""
family_tree = _build_node_ancestor_tree(manifest, node)
knowledge = _inherit_column_level_knowledge(manifest, family_tree, placeholders)
knowledge = _inherit_column_level_knowledge(
manifest, family_tree, placeholders, project_dir, use_unrendered_descriptions
)
return knowledge

@staticmethod
Expand Down
8 changes: 7 additions & 1 deletion src/dbt_osmosis/core/osmosis.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ def __init__(
skip_merge_meta: bool = False,
add_progenitor_to_meta: bool = False,
vars: Optional[str] = None,
use_unrendered_descriptions: bool = False,
profile: Optional[str] = None,
):
"""Initializes the DbtYamlManager class."""
Expand All @@ -117,6 +118,7 @@ def __init__(
self.skip_add_tags = skip_add_tags
self.skip_merge_meta = skip_merge_meta
self.add_progenitor_to_meta = add_progenitor_to_meta
self.use_unrendered_descriptions = use_unrendered_descriptions

if len(list(self.filtered_models())) == 0:
logger().warning(
Expand Down Expand Up @@ -1057,7 +1059,11 @@ def update_schema_file_and_node(
)

knowledge = ColumnLevelKnowledgePropagator.get_node_columns_with_inherited_knowledge(
self.manifest, node, self.placeholders
self.manifest,
node,
self.placeholders,
self.base_config.project_dir,
self.use_unrendered_descriptions,
)
n_cols_doc_inherited = (
ColumnLevelKnowledgePropagator.update_undocumented_columns_with_prior_knowledge(
Expand Down
20 changes: 20 additions & 0 deletions src/dbt_osmosis/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,14 @@ def wrapper(*args, **kwargs):
" my_value}'"
),
)
@click.option(
"--use-unrendered-descriptions",
is_flag=True,
help=(
"If specified, will use unrendered column descriptions in the documentation."
"This is useful for propogating docs blocks"
),
)
@click.argument("models", nargs=-1)
def refactor(
target: Optional[str] = None,
Expand All @@ -181,6 +189,7 @@ def refactor(
models: Optional[List[str]] = None,
profile: Optional[str] = None,
vars: Optional[str] = None,
use_unrendered_descriptions: bool = False,
):
"""Executes organize which syncs yaml files with database schema and organizes the dbt models
directory, reparses the project, then executes document passing down inheritable documentation
Expand Down Expand Up @@ -210,6 +219,7 @@ def refactor(
add_progenitor_to_meta=add_progenitor_to_meta,
profile=profile,
vars=vars,
use_unrendered_descriptions=use_unrendered_descriptions,
)

# Conform project structure & bootstrap undocumented models injecting columns
Expand Down Expand Up @@ -417,6 +427,14 @@ def organize(
" my_value}'"
),
)
@click.option(
"--use-unrendered-descriptions",
is_flag=True,
help=(
"If specified, will use unrendered column descriptions in the documentation."
"This is useful for propogating docs blocks"
),
)
@click.argument("models", nargs=-1)
def document(
target: Optional[str] = None,
Expand All @@ -434,6 +452,7 @@ def document(
add_progenitor_to_meta: bool = False,
profile: Optional[str] = None,
vars: Optional[str] = None,
use_unrendered_descriptions: bool = False,
):
"""Column level documentation inheritance for existing models
Expand Down Expand Up @@ -462,6 +481,7 @@ def document(
add_progenitor_to_meta=add_progenitor_to_meta,
profile=profile,
vars=vars,
use_unrendered_descriptions=use_unrendered_descriptions,
)

# Propagate documentation & inject/remove schema file columns to align with model in database
Expand Down
Loading

0 comments on commit 1c16714

Please sign in to comment.