From 41f6c498e2c4cbad125f6e12208a5c9ea2782581 Mon Sep 17 00:00:00 2001 From: treff7es Date: Mon, 7 Oct 2024 14:30:01 +0200 Subject: [PATCH] Move base32 encode --- .../source/bigquery_v2/bigquery_schema_gen.py | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py index c1b90002098914..49a2b5bbf76b9e 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py @@ -196,6 +196,18 @@ def store_table_refs(self): or self.config.use_queries_v2 ) + def modified_base32decode(self, text_to_decode: str) -> str: + # When we sync from DataHub to BigQuery, we encode the tags as modified base32 strings. + # BiqQuery labels only support lowercase letters, international characters, numbers, or underscores. + # So we need to modify the base32 encoding to replace the padding character `=` with `_` and convert to lowercase. + if not text_to_decode.startswith("%s" % ENCODED_TAG_PREFIX): + return text_to_decode + text_to_decode = ( + text_to_decode.replace(ENCODED_TAG_PREFIX, "").upper().replace("_", "=") + ) + text = b32decode(text_to_decode.encode("utf-8")).decode("utf-8") + return text + def get_project_workunits( self, project: BigqueryProject ) -> Iterable[MetadataWorkUnit]: @@ -791,18 +803,6 @@ def gen_snapshot_dataset_workunits( custom_properties=custom_properties, ) - def modified_base32decode(self, text_to_decode: str) -> str: - # When we sync from DataHub to BigQuery, we encode the tags as modified base32 strings. - # BiqQuery labels only support lowercase letters, international characters, numbers, or underscores. - # So we need to modify the base32 encoding to replace the padding character `=` with `_` and convert to lowercase. - if not text_to_decode.startswith("%s" % ENCODED_TAG_PREFIX): - return text_to_decode - text_to_decode = ( - text_to_decode.replace(ENCODED_TAG_PREFIX, "").upper().replace("_", "=") - ) - text = b32decode(text_to_decode.encode("utf-8")).decode("utf-8") - return text - def gen_dataset_workunits( self, table: Union[BigqueryTable, BigqueryView, BigqueryTableSnapshot],