Skip to content

Commit

Permalink
feat(bigquery): fetch bigquery dataset (datahub container) description (
Browse files Browse the repository at this point in the history
  • Loading branch information
sgomezvillamor authored Feb 19, 2025
1 parent d33ef7a commit 48f82a4
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,11 @@ def get_datasets_for_project_id(
if hasattr(d, "_properties") and isinstance(d._properties, dict)
else None
),
# TODO: Fetch dataset description individually impacts overall performance if the number of datasets is high (hundreds); instead we should fetch in batch for all datasets.
# TODO: Given we are calling get_dataset for each dataset, we may consume and publish other fields too, such as created, modified, etc...
# https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.client.Client#google_cloud_bigquery_client_Client_get_dataset
# https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.dataset.Dataset
comment=self.bq_client.get_dataset(d.reference).description,
)
for d in datasets
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,7 @@ def gen_dataset_containers(
self,
dataset: str,
project_id: str,
description: Optional[str] = None,
tags: Optional[Dict[str, str]] = None,
extra_properties: Optional[Dict[str, str]] = None,
) -> Iterable[MetadataWorkUnit]:
Expand Down Expand Up @@ -336,6 +337,7 @@ def gen_dataset_containers(
domain_config=self.config.domain,
schema_container_key=schema_container_key,
database_container_key=database_container_key,
description=description,
external_url=(
BQ_EXTERNAL_DATASET_URL_TEMPLATE.format(
project=project_id, dataset=dataset
Expand Down Expand Up @@ -471,14 +473,15 @@ def _process_schema(

if self.config.include_schema_metadata:
yield from self.gen_dataset_containers(
dataset_name,
project_id,
bigquery_dataset.labels,
(
dataset=dataset_name,
project_id=project_id,
tags=bigquery_dataset.labels,
extra_properties=(
{"location": bigquery_dataset.location}
if bigquery_dataset.location
else None
),
description=bigquery_dataset.comment,
)

columns = None
Expand Down

0 comments on commit 48f82a4

Please sign in to comment.