Skip to content

Commit

Permalink
Merge references
Browse files Browse the repository at this point in the history
  • Loading branch information
steinitzu committed Oct 9, 2024
1 parent 0e71a7f commit 22f70c8
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 11 deletions.
18 changes: 12 additions & 6 deletions dlt/common/schema/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -560,18 +560,24 @@ def normalize_table_identifiers(table: TTableSchema, naming: NamingConvention) -
else:
new_columns[new_col_name] = c
table["columns"] = new_columns
reference = table.get("references")
if reference:
new_references = []
for ref in reference:
references = table.get("references")
if references:
new_references = {}
for ref in references:
new_ref = copy(ref)
new_ref["referenced_table"] = naming.normalize_tables_path(ref["referenced_table"])
new_ref["columns"] = [naming.normalize_path(c) for c in ref["columns"]]
new_ref["referenced_columns"] = [
naming.normalize_path(c) for c in ref["referenced_columns"]
]
new_references.append(new_ref)
table["references"] = new_references
if new_ref["referenced_table"] in new_references:
logger.warning(
f"In schema {naming} table {table['name']} has multiple references to"
f" {new_ref['referenced_table']}. Only the last one is preserved."
)
new_references[new_ref["referenced_table"]] = new_ref

table["references"] = list(new_references.values())
return table


Expand Down
9 changes: 7 additions & 2 deletions dlt/extract/hints.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,10 +348,15 @@ def apply_hints(
else:
t.pop("file_format", None)
if references is not None:
if references:
if callable(references) or callable(t.get("references")):
t["references"] = references
else:
t.pop("references", None)
# Replace existin refs for same table
new_references = t.get("references") or []
ref_dict = {r["referenced_table"]: r for r in new_references}
for ref in references:
ref_dict[ref["referenced_table"]] = ref
t["references"] = list(ref_dict.values())

# set properties that can't be passed to make_hints
if incremental is not None:
Expand Down
19 changes: 16 additions & 3 deletions tests/extract/test_sources.py
Original file line number Diff line number Diff line change
Expand Up @@ -1420,7 +1420,7 @@ def empty_gen():
table = empty_r.compute_table_schema()
assert table["references"] == reference_hint

# Apply references again, hint is replaced
# Apply references again, list is extended
reference_hint_2 = [
dict(
referenced_table="other_table_2",
Expand All @@ -1429,9 +1429,22 @@ def empty_gen():
)
]
empty_r.apply_hints(references=reference_hint_2)
assert empty_r._hints["references"] == reference_hint_2
assert empty_r._hints["references"] == reference_hint + reference_hint_2
table = empty_r.compute_table_schema()
assert table["references"] == reference_hint_2
assert table["references"] == reference_hint + reference_hint_2

# Duplicate reference is replaced
reference_hint_3 = [
dict(
referenced_table="other_table",
columns=["a2", "b2"],
referenced_columns=["other_a2", "other_b2"],
)
]
empty_r.apply_hints(references=reference_hint_3)
assert empty_r._hints["references"] == reference_hint_3 + reference_hint_2
table = empty_r.compute_table_schema()
assert table["references"] == reference_hint_3 + reference_hint_2


def test_apply_dynamic_hints() -> None:
Expand Down

0 comments on commit 22f70c8

Please sign in to comment.