Skip to content

Commit

Permalink
fix(ingest/lookml): missing lineage for looker template -- if prod (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
sid-acryl authored Oct 1, 2024
1 parent a078768 commit 67d7116
Show file tree
Hide file tree
Showing 7 changed files with 73 additions and 19 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,14 @@
from dataclasses import dataclass
from typing import Dict, List, Optional, Set

from datahub.ingestion.source.looker.lkml_patched import load_lkml
from datahub.ingestion.source.looker.looker_connection import LookerConnectionDefinition
from datahub.ingestion.source.looker.looker_template_language import (
load_and_preprocess_file,
)
from datahub.ingestion.source.looker.lookml_config import (
_BASE_PROJECT_NAME,
_EXPLORE_FILE_EXTENSION,
LookMLSourceConfig,
LookMLSourceReport,
)

Expand Down Expand Up @@ -43,6 +46,7 @@ def from_looker_dict(
root_project_name: Optional[str],
base_projects_folders: Dict[str, pathlib.Path],
path: str,
source_config: LookMLSourceConfig,
reporter: LookMLSourceReport,
) -> "LookerModel":
logger.debug(f"Loading model from {path}")
Expand All @@ -54,6 +58,7 @@ def from_looker_dict(
root_project_name,
base_projects_folders,
path,
source_config,
reporter,
seen_so_far=set(),
traversal_path=pathlib.Path(path).stem,
Expand All @@ -68,7 +73,10 @@ def from_looker_dict(
]
for included_file in explore_files:
try:
parsed = load_lkml(included_file)
parsed = load_and_preprocess_file(
path=included_file,
source_config=source_config,
)
included_explores = parsed.get("explores", [])
explores.extend(included_explores)
except Exception as e:
Expand All @@ -94,6 +102,7 @@ def resolve_includes(
root_project_name: Optional[str],
base_projects_folder: Dict[str, pathlib.Path],
path: str,
source_config: LookMLSourceConfig,
reporter: LookMLSourceReport,
seen_so_far: Set[str],
traversal_path: str = "", # a cosmetic parameter to aid debugging
Expand Down Expand Up @@ -206,7 +215,10 @@ def resolve_includes(
f"Will be loading {included_file}, traversed here via {traversal_path}"
)
try:
parsed = load_lkml(included_file)
parsed = load_and_preprocess_file(
path=included_file,
source_config=source_config,
)
seen_so_far.add(included_file)
if "includes" in parsed: # we have more includes to resolve!
resolved.extend(
Expand All @@ -216,6 +228,7 @@ def resolve_includes(
root_project_name,
base_projects_folder,
included_file,
source_config,
reporter,
seen_so_far,
traversal_path=traversal_path
Expand Down Expand Up @@ -259,6 +272,7 @@ def from_looker_dict(
root_project_name: Optional[str],
base_projects_folder: Dict[str, pathlib.Path],
raw_file_content: str,
source_config: LookMLSourceConfig,
reporter: LookMLSourceReport,
) -> "LookerViewFile":
logger.debug(f"Loading view file at {absolute_file_path}")
Expand All @@ -272,6 +286,7 @@ def from_looker_dict(
root_project_name,
base_projects_folder,
absolute_file_path,
source_config,
reporter,
seen_so_far=seen_so_far,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,10 @@
from dataclasses import replace
from typing import Dict, Optional

from datahub.ingestion.source.looker.lkml_patched import load_lkml
from datahub.ingestion.source.looker.looker_config import LookerConnectionDefinition
from datahub.ingestion.source.looker.looker_dataclasses import LookerViewFile
from datahub.ingestion.source.looker.looker_template_language import (
process_lookml_template_language,
load_and_preprocess_file,
)
from datahub.ingestion.source.looker.lookml_config import (
_EXPLORE_FILE_EXTENSION,
Expand Down Expand Up @@ -72,10 +71,8 @@ def _load_viewfile(
try:
logger.debug(f"Loading viewfile {path}")

parsed = load_lkml(path)

process_lookml_template_language(
view_lkml_file_dict=parsed,
parsed = load_and_preprocess_file(
path=path,
source_config=self.source_config,
)

Expand All @@ -86,6 +83,7 @@ def _load_viewfile(
root_project_name=self._root_project_name,
base_projects_folder=self._base_projects_folder,
raw_file_content=raw_file_content,
source_config=self.source_config,
reporter=reporter,
)
logger.debug(f"adding viewfile for path {path} to the cache")
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
import logging
import pathlib
import re
from abc import ABC, abstractmethod
from typing import Any, ClassVar, Dict, List, Optional, Set
from typing import Any, ClassVar, Dict, List, Optional, Set, Union

from deepmerge import always_merger
from liquid import Undefined
from liquid.exceptions import LiquidSyntaxError

from datahub.ingestion.source.looker.lkml_patched import load_lkml
from datahub.ingestion.source.looker.looker_constant import (
DATAHUB_TRANSFORMED_SQL,
DATAHUB_TRANSFORMED_SQL_TABLE_NAME,
Expand Down Expand Up @@ -390,6 +392,7 @@ def process_lookml_template_language(
source_config: LookMLSourceConfig,
view_lkml_file_dict: dict,
) -> None:

if "views" not in view_lkml_file_dict:
return

Expand All @@ -416,3 +419,18 @@ def process_lookml_template_language(
)

view_lkml_file_dict["views"] = transformed_views


def load_and_preprocess_file(
path: Union[str, pathlib.Path],
source_config: LookMLSourceConfig,
) -> dict:

parsed = load_lkml(path)

process_lookml_template_language(
view_lkml_file_dict=parsed,
source_config=source_config,
)

return parsed
Original file line number Diff line number Diff line change
Expand Up @@ -365,8 +365,9 @@ def sql_table_name(self) -> str:
return sql_table_name.lower()

def datahub_transformed_sql_table_name(self) -> str:
table_name: Optional[str] = self.raw_view.get(
"datahub_transformed_sql_table_name"
# This field might be present in parent view of current view
table_name: Optional[str] = self.get_including_extends(
field="datahub_transformed_sql_table_name"
)

if not table_name:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
DatasetSubTypes,
)
from datahub.ingestion.source.git.git_import import GitClone
from datahub.ingestion.source.looker.lkml_patched import load_lkml
from datahub.ingestion.source.looker.looker_common import (
CORPUSER_DATAHUB,
LookerExplore,
Expand All @@ -45,6 +44,9 @@
get_connection_def_based_on_connection_string,
)
from datahub.ingestion.source.looker.looker_lib_wrapper import LookerAPI
from datahub.ingestion.source.looker.looker_template_language import (
load_and_preprocess_file,
)
from datahub.ingestion.source.looker.looker_view_id_cache import (
LookerModel,
LookerViewFileLoader,
Expand Down Expand Up @@ -311,13 +313,19 @@ def __init__(self, config: LookMLSourceConfig, ctx: PipelineContext):

def _load_model(self, path: str) -> LookerModel:
logger.debug(f"Loading model from file {path}")
parsed = load_lkml(path)

parsed = load_and_preprocess_file(
path=path,
source_config=self.source_config,
)

looker_model = LookerModel.from_looker_dict(
parsed,
_BASE_PROJECT_NAME,
self.source_config.project_name,
self.base_projects_folder,
path,
self.source_config,
self.reporter,
)
return looker_model
Expand Down Expand Up @@ -495,7 +503,10 @@ def get_project_name(self, model_name: str) -> str:
def get_manifest_if_present(self, folder: pathlib.Path) -> Optional[LookerManifest]:
manifest_file = folder / "manifest.lkml"
if manifest_file.exists():
manifest_dict = load_lkml(manifest_file)

manifest_dict = load_and_preprocess_file(
path=manifest_file, source_config=self.source_config
)

manifest = LookerManifest(
project_name=manifest_dict.get("project_name"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@ def _generate_fully_qualified_name(
sql_table_name: str,
connection_def: LookerConnectionDefinition,
reporter: LookMLSourceReport,
view_name: str,
) -> str:
"""Returns a fully qualified dataset name, resolved through a connection definition.
Input sql_table_name can be in three forms: table, db.table, db.schema.table"""
Expand Down Expand Up @@ -192,7 +193,7 @@ def _generate_fully_qualified_name(
reporter.report_warning(
title="Malformed Table Name",
message="Table name has more than 3 parts.",
context=f"Table Name: {sql_table_name}",
context=f"view-name: {view_name}, table-name: {sql_table_name}",
)
return sql_table_name.lower()

Expand Down Expand Up @@ -280,10 +281,13 @@ def __get_upstream_dataset_urn(self) -> List[Urn]:
return []

if sql_parsing_result.debug_info.table_error is not None:
logger.debug(
f"view-name={self.view_context.name()}, sql_query={self.get_sql_query()}"
)
self.reporter.report_warning(
title="Table Level Lineage Missing",
message="Error in parsing derived sql",
context=f"View-name: {self.view_context.name()}",
context=f"view-name: {self.view_context.name()}, platform: {self.view_context.view_connection.platform}",
exc=sql_parsing_result.debug_info.table_error,
)
return []
Expand Down Expand Up @@ -530,6 +534,7 @@ def __get_upstream_dataset_urn(self) -> Urn:
sql_table_name=self.view_context.datahub_transformed_sql_table_name(),
connection_def=self.view_context.view_connection,
reporter=self.view_context.reporter,
view_name=self.view_context.name(),
)

self.upstream_dataset_urn = make_dataset_urn_with_platform_instance(
Expand Down Expand Up @@ -586,6 +591,7 @@ def __get_upstream_dataset_urn(self) -> List[Urn]:
self.view_context.datahub_transformed_sql_table_name(),
self.view_context.view_connection,
self.view_context.reporter,
self.view_context.name(),
),
base_folder_path=self.view_context.base_folder_path,
looker_view_id_cache=self.looker_view_id_cache,
Expand Down
9 changes: 7 additions & 2 deletions metadata-ingestion/tests/integration/lookml/test_lookml.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import pathlib
from typing import Any, List
from unittest import mock
from unittest.mock import MagicMock

import pydantic
import pytest
Expand All @@ -14,13 +15,13 @@
from datahub.ingestion.source.file import read_metadata_file
from datahub.ingestion.source.looker.looker_template_language import (
SpecialVariable,
load_and_preprocess_file,
resolve_liquid_variable,
)
from datahub.ingestion.source.looker.lookml_source import (
LookerModel,
LookerRefinementResolver,
LookMLSourceConfig,
load_lkml,
)
from datahub.metadata.schema_classes import (
DatasetSnapshotClass,
Expand Down Expand Up @@ -870,7 +871,11 @@ def test_manifest_parser(pytestconfig: pytest.Config) -> None:
test_resources_dir = pytestconfig.rootpath / "tests/integration/lookml"
manifest_file = test_resources_dir / "lkml_manifest_samples/complex-manifest.lkml"

manifest = load_lkml(manifest_file)
manifest = load_and_preprocess_file(
path=manifest_file,
source_config=MagicMock(),
)

assert manifest


Expand Down

0 comments on commit 67d7116

Please sign in to comment.