Skip to content

Commit

Permalink
feat(ingest/powerbi): fix subTypes and add workspace_type_filter (dat…
Browse files Browse the repository at this point in the history
…ahub-project#11523)

Co-authored-by: Harshal Sheth <[email protected]>
  • Loading branch information
2 people authored and sleeperdeep committed Dec 17, 2024
1 parent 3a4f608 commit d9523a9
Show file tree
Hide file tree
Showing 28 changed files with 2,870 additions and 716 deletions.
4 changes: 3 additions & 1 deletion metadata-ingestion/docs/sources/powerbi/powerbi_pre.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,11 @@
| `Report.webUrl` | `Chart.externalUrl` |
| `Workspace` | `Container` |
| `Report` | `Dashboard` |
| `PaginatedReport` | `Dashboard` |
| `Page` | `Chart` |

If Tile is created from report then Chart.externalUrl is set to Report.webUrl.
- If `Tile` is created from report then `Chart.externalUrl` is set to Report.webUrl.
- The `Page` is unavailable for PowerBI PaginatedReport.

## Lineage

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ class DatasetSubTypes(StrEnum):
ELASTIC_DATASTREAM = "Datastream"
SALESFORCE_CUSTOM_OBJECT = "Custom Object"
SALESFORCE_STANDARD_OBJECT = "Object"
POWERBI_DATASET_TABLE = "PowerBI Dataset Table"
QLIK_DATASET = "Qlik Dataset"
BIGQUERY_TABLE_SNAPSHOT = "Bigquery Table Snapshot"
SHARDED_TABLE = "Sharded Table"
Expand Down Expand Up @@ -48,8 +47,8 @@ class BIContainerSubTypes(StrEnum):
LOOKML_PROJECT = "LookML Project"
LOOKML_MODEL = "LookML Model"
TABLEAU_WORKBOOK = "Workbook"
POWERBI_WORKSPACE = "Workspace"
POWERBI_DATASET = "PowerBI Dataset"
POWERBI_DATASET = "Semantic Model"
POWERBI_DATASET_TABLE = "Table"
QLIK_SPACE = "Qlik Space"
QLIK_APP = "Qlik App"
SIGMA_WORKSPACE = "Sigma Workspace"
Expand Down
27 changes: 24 additions & 3 deletions metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging
from dataclasses import dataclass, field as dataclass_field
from enum import Enum
from typing import Dict, List, Optional, Union
from typing import Dict, List, Literal, Optional, Union

import pydantic
from pydantic import validator
Expand Down Expand Up @@ -47,6 +47,7 @@ class Constant:
WORKSPACE_ID = "workspaceId"
DASHBOARD_ID = "powerbi.linkedin.com/dashboards/{}"
DATASET_EXECUTE_QUERIES = "DATASET_EXECUTE_QUERIES_POST"
GET_WORKSPACE_APP = "GET_WORKSPACE_APP"
DATASET_ID = "datasetId"
REPORT_ID = "reportId"
SCAN_ID = "ScanId"
Expand Down Expand Up @@ -118,6 +119,15 @@ class Constant:
CHART_COUNT = "chartCount"
WORKSPACE_NAME = "workspaceName"
DATASET_WEB_URL = "datasetWebUrl"
TYPE = "type"
REPORT_TYPE = "reportType"
LAST_UPDATE = "lastUpdate"
APP_ID = "appId"
REPORTS = "reports"
ORIGINAL_REPORT_OBJECT_ID = "originalReportObjectId"
APP_SUB_TYPE = "App"
STATE = "state"
ACTIVE = "Active"


@dataclass
Expand Down Expand Up @@ -273,7 +283,8 @@ class PowerBiDashboardSourceConfig(
# PowerBi workspace identifier
workspace_id_pattern: AllowDenyPattern = pydantic.Field(
default=AllowDenyPattern.allow_all(),
description="Regex patterns to filter PowerBI workspaces in ingestion",
description="Regex patterns to filter PowerBI workspaces in ingestion."
" Note: This field works in conjunction with 'workspace_type_filter' and both must be considered when filtering workspaces.",
)

# Dataset type mapping PowerBI support many type of data-sources. Here user need to define what type of PowerBI
Expand Down Expand Up @@ -340,7 +351,7 @@ class PowerBiDashboardSourceConfig(
)
modified_since: Optional[str] = pydantic.Field(
default=None,
description="Get only recently modified workspaces based on modified_since datetime '2023-02-10T00:00:00.0000000Z', excludePersonalWorkspaces and excludeInActiveWorkspaces limit to last 30 days",
description="Get only recently modified workspaces based on modified_since datetime '2023-02-10T00:00:00.0000000Z', excludeInActiveWorkspaces limit to last 30 days",
)
extract_dashboards: bool = pydantic.Field(
default=True,
Expand Down Expand Up @@ -445,6 +456,16 @@ class PowerBiDashboardSourceConfig(
description="Patch dashboard metadata",
)

workspace_type_filter: List[
Literal[
"Workspace", "PersonalGroup", "Personal", "AdminWorkspace", "AdminInsights"
]
] = pydantic.Field(
default=["Workspace"],
description="Ingest the metadata of the workspace where the workspace type corresponds to the specified workspace_type_filter."
" Note: This field works in conjunction with 'workspace_id_pattern'. Both must be matched for a workspace to be processed.",
)

@root_validator(skip_on_failure=True)
def validate_extract_column_level_lineage(cls, values: Dict) -> Dict:
flags = [
Expand Down
Loading

0 comments on commit d9523a9

Please sign in to comment.