Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(ingestion/tableau): hidden asset handling #11559

Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 48 additions & 16 deletions metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py
Original file line number Diff line number Diff line change
Expand Up @@ -488,6 +488,16 @@ class TableauConfig(
description="Configuration settings for ingesting Tableau groups and their capabilities as custom properties.",
)

ingest_hidden_assets: bool = Field(
True,
description="When enabled, hidden views and dashboards are ingested into Datahub. If a dashboard or view is hidden in Tableau the luid is blank. Default of this config field is True.",
)

tags_for_hidden_assets: List[str] = Field(
default=[],
description="Tags to be added to hidden dashboards and views. If a dashboard or view is hidden in Tableau the luid is blank.",
)

# pre = True because we want to take some decision before pydantic initialize the configuration to default values
@root_validator(pre=True)
def projects_backward_compatibility(cls, values: Dict) -> Dict:
Expand Down Expand Up @@ -1024,6 +1034,11 @@ def get_data_platform_instance(self) -> DataPlatformInstanceClass:
),
)

def _is_hidden_view(self, dashboard_or_view: Dict) -> bool:
# LUID is blank if the view is hidden in the workbook.
# More info here: https://help.tableau.com/current/api/metadata_api/en-us/reference/view.doc.html
return not dashboard_or_view.get(c.LUID)

def get_connection_object_page(
self,
query: str,
Expand Down Expand Up @@ -2255,10 +2270,9 @@ def emit_datasource(
# Tags
if datasource_info:
tags = self.get_tags(datasource_info)
if tags:
haeniya marked this conversation as resolved.
Show resolved Hide resolved
dataset_snapshot.aspects.append(
builder.make_global_tag_aspect_with_tag_list(tags)
)
dataset_snapshot.aspects.append(
builder.make_global_tag_aspect_with_tag_list(tags)
)

# Browse path
if browse_path and is_embedded_ds and workbook and workbook.get(c.NAME):
Expand Down Expand Up @@ -2644,7 +2658,12 @@ def emit_sheets(self) -> Iterable[MetadataWorkUnit]:
c.SHEETS_CONNECTION,
sheets_filter,
):
yield from self.emit_sheets_as_charts(sheet, sheet.get(c.WORKBOOK))
if self.config.ingest_hidden_assets or not self._is_hidden_view(sheet):
yield from self.emit_sheets_as_charts(sheet, sheet.get(c.WORKBOOK))
else:
logger.debug(
f"Skip view {sheet.get(c.ID)} because it's hidden (luid is blank)."
)
haeniya marked this conversation as resolved.
Show resolved Hide resolved

def emit_sheets_as_charts(
self, sheet: dict, workbook: Optional[Dict]
Expand Down Expand Up @@ -2736,10 +2755,13 @@ def emit_sheets_as_charts(

# Tags
tags = self.get_tags(sheet)
if tags:
chart_snapshot.aspects.append(
builder.make_global_tag_aspect_with_tag_list(tags)
)
if len(self.config.tags_for_hidden_assets) > 0 and self._is_hidden_view(sheet):
tags.extend(self.config.tags_for_hidden_assets)

chart_snapshot.aspects.append(
Copy link
Contributor Author

@haeniya haeniya Oct 8, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There seemed to be a general issue with the tag ingestion. If all tags are removed in Tableau they wouldn't get removed in Datahub because tags are only saved if there are some tags present. Now, we basically always save the GlobalTags aspect even if there are no tags. But this allows us to properly reset the tags if they were removed from Tableau. I refactored this for all the assets with tags.

This resulted in quite a big changeset in the golden test files. Let me know if you see an issue with this. Thanks.

builder.make_global_tag_aspect_with_tag_list(tags)
)

yield self.get_metadata_change_event(chart_snapshot)
if sheet_external_url is not None and self.config.ingest_embed_url is True:
yield self.new_work_unit(
Expand Down Expand Up @@ -2952,17 +2974,23 @@ def emit_dashboards(self) -> Iterable[MetadataWorkUnit]:
c.DASHBOARDS_CONNECTION,
dashboards_filter,
):
yield from self.emit_dashboard(dashboard, dashboard.get(c.WORKBOOK))

def get_tags(self, obj: dict) -> Optional[List[str]]:
if self.config.ingest_hidden_assets or not self._is_hidden_view(dashboard):
yield from self.emit_dashboard(dashboard, dashboard.get(c.WORKBOOK))
else:
logger.debug(
haeniya marked this conversation as resolved.
Show resolved Hide resolved
f"Skip dashboard {dashboard.get(c.ID)} because it's hidden (luid is blank)."
)

def get_tags(self, obj: dict) -> List[str]:
tag_list = obj.get(c.TAGS, [])
if tag_list and self.config.ingest_tags:
tag_list_str = [
t[c.NAME] for t in tag_list if t is not None and t.get(c.NAME)
]

return tag_list_str
return None
return []

def emit_dashboard(
self, dashboard: dict, workbook: Optional[Dict]
Expand Down Expand Up @@ -3014,10 +3042,14 @@ def emit_dashboard(
dashboard_snapshot.aspects.append(dashboard_info_class)

tags = self.get_tags(dashboard)
if tags:
dashboard_snapshot.aspects.append(
builder.make_global_tag_aspect_with_tag_list(tags)
)
if len(self.config.tags_for_hidden_assets) > 0 and self._is_hidden_view(
dashboard
):
tags.extend(self.config.tags_for_hidden_assets)

dashboard_snapshot.aspects.append(
builder.make_global_tag_aspect_with_tag_list(tags)
)

if self.config.extract_usage_stats:
# dashboard_snapshot doesn't support the stat aspect as list element and hence need to emit MetadataWorkUnit
Expand Down
Loading
Loading