From 63ca8a23be6a18f9b89389075ac5bf162fe44083 Mon Sep 17 00:00:00 2001 From: jp Date: Wed, 10 Jul 2024 16:51:33 +0800 Subject: [PATCH] fix alias with multiple internal underscores --- defog_utils/utils_db.py | 4 +++- tests/test_utils_db.py | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/defog_utils/utils_db.py b/defog_utils/utils_db.py index 081047d..4d8587f 100644 --- a/defog_utils/utils_db.py +++ b/defog_utils/utils_db.py @@ -577,7 +577,9 @@ def generate_aliases_dict( if "_" in table_name: # get the first letter of each subword delimited by "_" table_name = table_name.strip("_") - alias = "".join([word[0] for word in table_name.split("_")]).lower() + # use re to split on one or more underscores + words = re.split(r"_+", table_name) + alias = "".join(word[0] for word in words).lower() else: # if camelCase, get the first letter of each subword # otherwise defaults to just getting the 1st letter of the table_name diff --git a/tests/test_utils_db.py b/tests/test_utils_db.py index fbd951b..99c93fe 100644 --- a/tests/test_utils_db.py +++ b/tests/test_utils_db.py @@ -624,10 +624,10 @@ def test_generate_aliases_with_reserved_keywords(self): self.assertEqual(result, expected_result) def test_generate_aliases_with_dots_and_underscores(self): - table_names = ["db.schema.table1", "db.schema.table2", "db.schema.table3", "_uncompressed"] + table_names = ["db.schema.table1", "db.schema.table2", "db.schema.table3", "_uncompressed___long_name_"] result = generate_aliases(table_names) print(result) - expected_result = "-- db.schema.table1 AS t1\n-- db.schema.table2 AS t2\n-- db.schema.table3 AS t3\n-- _uncompressed AS u\n" + expected_result = "-- db.schema.table1 AS t1\n-- db.schema.table2 AS t2\n-- db.schema.table3 AS t3\n-- _uncompressed___long_name_ AS uln\n" self.assertEqual(result, expected_result)