Skip to content

Commit

Permalink
generate container for parent first
Browse files Browse the repository at this point in the history
  • Loading branch information
sid-acryl committed Sep 3, 2024
1 parent 439a375 commit 29f9406
Showing 1 changed file with 58 additions and 23 deletions.
81 changes: 58 additions & 23 deletions metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py
Original file line number Diff line number Diff line change
Expand Up @@ -3067,36 +3067,71 @@ def _get_ownership(self, user: str) -> Optional[OwnershipClass]:
return None

def emit_project_containers(self) -> Iterable[MetadataWorkUnit]:
for _id, project in self.tableau_project_registry.items():
parent_container_key: Optional[ContainerKey] = None
logger.debug(
f"project {project.name} and it's parent {project.parent_name} and parent id {project.parent_id}"
)
if project.parent_id:
parent_container_key = self.gen_project_key(project.parent_id)
elif self.config.add_site_container and self.site and self.site.id:
parent_container_key = self.gen_site_key(self.site.id)
history: Set[str] = set()

def emit_project_in_topological_order(
project_: TableauProject,
) -> Iterable[MetadataWorkUnit]:
if project_.parent_id is None:
project_key = self.gen_project_key(project_.id)
if project_key.guid() not in history:
history.add(project_key.guid())

parent_container_key: Optional[ContainerKey] = None
# set site as parent as per config
if self.config.add_site_container and self.site and self.site.id:
parent_container_key = self.gen_site_key(self.site.id)

yield from gen_containers(
container_key=self.gen_project_key(project_.id),
name=project_.name,
description=project_.description
if project_.description != ""
else None,
sub_types=[c.PROJECT],
parent_container_key=parent_container_key,
)

return

parent_project: Optional[
TableauProject
] = self.tableau_project_registry.get(project_.parent_id)
if (
project.parent_id is not None
and project.parent_id not in self.tableau_project_registry
):
# Parent project got skipped because of project_pattern.
# Let's ingest its container name property to show parent container name on DataHub Portal, otherwise
# DataHub Portal will show parent container URN
parent_project is None
): # this project is filtered because of project_pattern, we need to generate MCP
# for this, otherwise DataHub Portal will show parent container URN
assert (
project_.parent_name
), f"project {project_.name} parent project name is None"
parent_project = TableauProject(
id=project_.parent_id,
name=project_.parent_name,
description="",
parent_id=None,
parent_name=None,
path=[],
)

yield from emit_project_in_topological_order(parent_project)

container_key = self.gen_project_key(project_.id)

if container_key.guid() not in history:
history.add(container_key.guid())
yield from gen_containers(
container_key=self.gen_project_key(project.parent_id),
name=cast(str, project.parent_name),
container_key=container_key,
name=project_.name,
description=project_.description,
sub_types=[c.PROJECT],
parent_container_key=self.gen_project_key(project_.parent_id),
)

yield from gen_containers(
container_key=self.gen_project_key(_id),
name=project.name,
description=project.description,
sub_types=[c.PROJECT],
parent_container_key=parent_container_key,
for id_, project in self.tableau_project_registry.items():
logger.debug(
f"project {project.name} and it's parent {project.parent_name} and parent id {project.parent_id}"
)
yield from emit_project_in_topological_order(project)

def emit_site_container(self):
if not self.site or not self.site.id:
Expand Down

0 comments on commit 29f9406

Please sign in to comment.