From 80c89c80471a3b500512ad27e7897109ca0296ac Mon Sep 17 00:00:00 2001
From: Ally Franken <ally.franken@capitalone.com>
Date: Wed, 15 Jan 2025 13:55:33 -0500
Subject: [PATCH] remove unnecessary helper functions

---
 datacompy/spark/sql.py | 57 +++++++++++++++---------------------------
 1 file changed, 20 insertions(+), 37 deletions(-)

diff --git a/datacompy/spark/sql.py b/datacompy/spark/sql.py
index 4f3a507..6db0674 100644
--- a/datacompy/spark/sql.py
+++ b/datacompy/spark/sql.py
@@ -1270,30 +1270,6 @@ def handle_numeric_strings(df, field_list):
     return df
 
 
-def format_numeric_fields(df):
-    fixed_cols = []
-    numeric_types = [
-        "tinyint",
-        "smallint",
-        "int",
-        "bigint",
-        "float",
-        "double",
-        "decimal"]
-
-    for c in df.dtypes:
-        # do not change non-numeric fields
-        if c[1] not in numeric_types:
-            fixed_cols.append(col(c[0]))
-        # round & truncate numeric fields
-        else:
-            new_val = format_number(col(c[0]), 5).alias(c[0])
-            fixed_cols.append(new_val)
-
-    formatted_df = df.select(*fixed_cols)
-    return formatted_df
-
-
 def sort_rows(prod_df, release_df):
     prod_cols = prod_df.columns
     release_cols = release_df.columns
@@ -1324,18 +1300,25 @@ def sort_columns(prod_df, release_df):
     return prod_df, release_df
 
 
-def convert_exponential_strings(base_df, compare_df):
-    # convert scientific number (1.23E4) to a decimal value
-    def sci_no_to_decimal(value):
-        return when(col(value).rlike(r"^[-+]?[0-9]*\.?[0-9]+[eE][0-9]+"),
-                      col(value).cast(T.DecimalType(30, 10))).otherwise(col(value))
-
-    df_return_list = []
+def format_numeric_fields(df):
+    fixed_cols = []
+    numeric_types = [
+        "tinyint",
+        "smallint",
+        "int",
+        "bigint",
+        "float",
+        "double",
+        "decimal"]
 
-    for df in [base_df, compare_df]:
-        for column in df.columns:
-            if column in df.columns and df.schema[column].dataType == T.StringType():
-                df = df.withColumn(column, sci_no_to_decimal(column))
-        df_return_list.append(df)
+    for c in df.dtypes:
+        # do not change non-numeric fields
+        if c[1] not in numeric_types:
+            fixed_cols.append(col(c[0]))
+        # round & truncate numeric fields
+        else:
+            new_val = format_number(col(c[0]), 5).alias(c[0])
+            fixed_cols.append(new_val)
 
-    return df_return_list[0], df_return_list[1]
\ No newline at end of file
+    formatted_df = df.select(*fixed_cols)
+    return formatted_df
\ No newline at end of file