Skip to content

Commit

Permalink
Move base32 encode
Browse files Browse the repository at this point in the history
  • Loading branch information
treff7es committed Oct 7, 2024
1 parent 34803d9 commit 41f6c49
Showing 1 changed file with 12 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,18 @@ def store_table_refs(self):
or self.config.use_queries_v2
)

def modified_base32decode(self, text_to_decode: str) -> str:
# When we sync from DataHub to BigQuery, we encode the tags as modified base32 strings.
# BiqQuery labels only support lowercase letters, international characters, numbers, or underscores.
# So we need to modify the base32 encoding to replace the padding character `=` with `_` and convert to lowercase.
if not text_to_decode.startswith("%s" % ENCODED_TAG_PREFIX):
return text_to_decode
text_to_decode = (
text_to_decode.replace(ENCODED_TAG_PREFIX, "").upper().replace("_", "=")
)
text = b32decode(text_to_decode.encode("utf-8")).decode("utf-8")
return text

def get_project_workunits(
self, project: BigqueryProject
) -> Iterable[MetadataWorkUnit]:
Expand Down Expand Up @@ -791,18 +803,6 @@ def gen_snapshot_dataset_workunits(
custom_properties=custom_properties,
)

def modified_base32decode(self, text_to_decode: str) -> str:
# When we sync from DataHub to BigQuery, we encode the tags as modified base32 strings.
# BiqQuery labels only support lowercase letters, international characters, numbers, or underscores.
# So we need to modify the base32 encoding to replace the padding character `=` with `_` and convert to lowercase.
if not text_to_decode.startswith("%s" % ENCODED_TAG_PREFIX):
return text_to_decode
text_to_decode = (
text_to_decode.replace(ENCODED_TAG_PREFIX, "").upper().replace("_", "=")
)
text = b32decode(text_to_decode.encode("utf-8")).decode("utf-8")
return text

def gen_dataset_workunits(
self,
table: Union[BigqueryTable, BigqueryView, BigqueryTableSnapshot],
Expand Down

0 comments on commit 41f6c49

Please sign in to comment.