Skip to content

Commit

Permalink
Merge pull request #25 from defog-ai/wendy/monthname
Browse files Browse the repository at this point in the history
Month_name_case_in + fix comma for /* comments
  • Loading branch information
wendy-aw authored Jul 15, 2024
2 parents 63ca8a2 + f96a698 commit 79d34c7
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 3 deletions.
23 changes: 22 additions & 1 deletion defog_utils/utils_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ class SqlFeatures(Features):
union: bool = False
case_condition: bool = False
has_in: bool = False
month_name_case_in: bool = False
addition: bool = False
subtraction: bool = False
ratio: bool = False
Expand Down Expand Up @@ -106,6 +107,7 @@ class SqlFeatures(Features):
time_pattern = r"^(0\d|1\d|2[0-3]):([0-5]\d):([0-5]\d)"
date_or_time_pattern = f"({date_pattern}|{time_pattern})"
date_column_pattern = r"(date|timestamp)(\s|$)"
month_name_pattern = r"('Jan'|'Feb'|'Mar'|'Apr'|'May'|'Jun'|'Jul'|'Aug'|'Sep'|'Oct'|'Nov'|'Dec'|'January'|'February'|'March'|'April'|'May'|'June'|'July'|'August'|'September'|'October'|'November'|'December')"
variance_expressions = [
exp.VariancePop,
exp.Variance,
Expand Down Expand Up @@ -354,8 +356,12 @@ def get_sql_features(
features.union = True
elif isinstance(node, exp.Case):
features.case_condition = True
if has_month_name(str(node)):
features.month_name_case_in = True
elif isinstance(node, exp.In):
features.has_in = True
if has_month_name(str(node)):
features.month_name_case_in = True
elif isinstance(node, exp.Add):
features.addition = True
elif isinstance(node, exp.Sub):
Expand Down Expand Up @@ -514,6 +520,8 @@ def is_date_or_time_str(s: str) -> bool:
m = re.match(date_or_time_pattern, s)
return bool(m)

def has_month_name(s: str) -> bool:
return bool(re.search(month_name_pattern, s, re.IGNORECASE))

def has_date_in_name(s: str) -> bool:
return bool(re.search(r"(year|quarter|month|week|day)", s))
Expand Down Expand Up @@ -605,6 +613,11 @@ def fix_comma(cols: List[str]) -> List[str]:
if not re.search(r",\s*--", col):
# use re.sub to replace (any whitespace)-- with , --
col = re.sub(r"\s*--", ", --", col)
elif "/*" in col:
# check if comma is just before comment
if not re.search(r",\s*/\*", col):
# use re.sub to replace (any whitespace)-- with , --
col = re.sub(r"\s*/\*", ", /*", col)
# check if string ends with comma (optionally with additional spaces)
elif not re.search(r",\s*$", col):
# end with comma if not present
Expand All @@ -614,14 +627,22 @@ def fix_comma(cols: List[str]) -> List[str]:
last_col = fixed_cols[-1]
if "--" in last_col:
# check if comma is after a word/closing brace, followed by spaces before -- and remove if present

pre_comment, after_comment = last_col.split("--", 1)
# check if pre_comment ends with a comma with optional spaces
if re.search(r",\s*$", pre_comment):
pre_comment = re.sub(r",\s*$", "", pre_comment)
# remove any trailing spaces in pre_comment
pre_comment = pre_comment.rstrip()
last_col = pre_comment + " --" + after_comment
elif "/*" in last_col:
# check if comma is after a word/closing brace, followed by spaces before -- and remove if present
pre_comment, after_comment = last_col.split("/*", 1)
# check if pre_comment ends with a comma with optional spaces
if re.search(r",\s*$", pre_comment):
pre_comment = re.sub(r",\s*$", "", pre_comment)
# remove any trailing spaces in pre_comment
pre_comment = pre_comment.rstrip()
last_col = pre_comment + " /*" + after_comment
# if last_col ends with a comma with optional spaces, remove it
elif re.search(r",\s*$", last_col):
last_col = re.sub(r",\s*$", "", last_col)
Expand Down
15 changes: 13 additions & 2 deletions tests/test_utils_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,17 @@ def test_case_condition(self):
features = get_sql_features(sql, self.md_cols, self.md_tables)
self.assertTrue(features.case_condition)

def test_month_name_case_in(self):
sql = "SELECT * FROM table WHERE month_col IN ('January', 'February', 'Mar', 'Apr')"
sql2 = "SELECT * FROM table WHERE month_col = 'January'"
sql3 = "SELECT * FROM review r WHERE DATE(CAST(r.year AS TEXT) || '-' || CASE r.month WHEN 'January' THEN '01' WHEN 'February' THEN '02' WHEN 'March' THEN '03' WHEN 'April' THEN '04' WHEN 'May' THEN '05' WHEN 'June' THEN '06' WHEN 'July' THEN '07' WHEN 'August' THEN '08' WHEN 'September' THEN '09' WHEN 'October' THEN '10' WHEN 'November' THEN '11' WHEN 'December' THEN '12' END || '-01') >= DATE('now', '-12 months');"
features = get_sql_features(sql, self.md_cols, self.md_tables)
features2 = get_sql_features(sql2, self.md_cols, self.md_tables)
features3 = get_sql_features(sql3, self.md_cols, self.md_tables)
self.assertTrue(features.month_name_case_in)
self.assertFalse(features2.month_name_case_in)
self.assertTrue(features3.month_name_case_in)

def test_ratio(self):
sql = "SELECT column1 / column2 FROM table"
features = get_sql_features(sql, self.md_cols, self.md_tables)
Expand Down Expand Up @@ -542,7 +553,7 @@ def test_complex_sql_1(self):
features = get_sql_features(sql, self.md_cols, self.md_tables)
features_compact = features.compact()
print(features_compact)
expected_compact = "5,2,1,1,0,1,1,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0"
expected_compact = "5,2,1,1,0,1,1,1,1,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0"
self.assertEqual(features_compact, expected_compact)
positive_features = features.positive_features()
expected_positive = {
Expand Down Expand Up @@ -581,7 +592,7 @@ def test_complex_sql_2(self):
)
features_compact = features.compact()
print(features_compact)
expected_compact = "3,1,1,1,0,1,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0"
expected_compact = "3,1,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0"
self.assertEqual(features_compact, expected_compact)
positive_features = features.positive_features()
expected_positive = {
Expand Down

0 comments on commit 79d34c7

Please sign in to comment.