Tweak Res Edit town close QC report to store percentage columns as nu…

…mbers (#606) * Tweak Res Edit town close QC report to store percentage columns as numbers * Fix typo in dbt/README.md * Fix typo in dbt/scripts/export_models.py Co-authored-by: Dan Snow <[email protected]> --------- Co-authored-by: Dan Snow <[email protected]>
ccao-data · Sep 19, 2024 · ac90562 · ac90562
1 parent d19dedd
commit ac90562
Show file tree

Hide file tree

Showing 5 changed files with 77 additions and 61 deletions.
diff --git a/dbt/README.md b/dbt/README.md
@@ -869,6 +869,10 @@ model during export:
                to make the column config object more readable.
              * `horizontal_align` (optional): The horizontal alignment to set on the column, one of
                `left` or `right`.
+             * `number_format` (optional): The number format to apply to the
+               column. See the [openpyxl source
+               code](https://openpyxl.readthedocs.io/en/stable/_modules/openpyxl/styles/numbers.html)
+               for a list of options
 
 #### Example: Adding a new QC report
 
@@ -888,8 +892,9 @@ models:
       export_format:
         columns:
           - index: B
-            name: Class
-            horizontal_align: left
+            name: Percent Change
+            horizontal_align: right
+            number_format: "0.00%"
 ```
 
 In the case of this model, the `export_models` script:
@@ -898,7 +903,8 @@ In the case of this model, the `export_models` script:
   is set
 * Will use the template `dbt/export/templates/qc_report_new.xlsx` to populate data
 * Will export the output workbook to `dbt/export/output/QC Report (New).xlsx`
-* Will left-align column B, a column with the name `Class`
+* Will right-align column B, a column with the name `Percent Change`
+* Will format column B as a percentage with two decimal places
 
 ## 🐛 Debugging tips
 

diff --git a/dbt/models/qc/qc.vw_report_town_close_res_edit.sql b/dbt/models/qc/qc.vw_report_town_close_res_edit.sql
@@ -100,42 +100,28 @@ SELECT
     aprval_prev.aprland AS aprland_prev,
     aprval_prev.aprbldg AS aprbldg_prev,
     aprval_prev.aprtot AS aprtot_prev,
-    CONCAT(
-        CAST(
-            ROUND(
-                (
-                    (aprval.aprtot - aprval_prev.aprtot)
-                    / CAST(aprval_prev.aprtot AS DOUBLE)
-                )
-                * 100,
-                2
-            )
-            AS VARCHAR
+    ROUND(
+        (
+            (aprval.aprtot - aprval_prev.aprtot)
+            / CAST(aprval_prev.aprtot AS DOUBLE)
         ),
-        '%'
+        2
     ) AS aprtot_percent_change,
     aprval.dwelval,
     aprval.dwelval + aprval.aprland AS dweltot,
     aprval_prev.dwelval AS dwelval_prev,
     aprval_prev.dwelval + aprval_prev.aprland AS dweltot_prev,
-    CONCAT(
-        CAST(
-            ROUND(
-                (
-                    (
-                        (aprval.dwelval + aprval.aprland)
-                        - (aprval_prev.dwelval + aprval_prev.aprland)
-                    )
-                    / CAST(
-                        (aprval_prev.dwelval + aprval_prev.aprland) AS DOUBLE
-                    )
-                )
-                * 100,
-                2
+    ROUND(
+        (
+            (
+                (aprval.dwelval + aprval.aprland)
+                - (aprval_prev.dwelval + aprval_prev.aprland)
+            )
+            / CAST(
+                (aprval_prev.dwelval + aprval_prev.aprland) AS DOUBLE
             )
-            AS VARCHAR
         ),
-        '%'
+        2
     ) AS dweltot_percent_change,
     sale.saledt_fmt,
     sale.price,

diff --git a/dbt/models/qc/schema.yml b/dbt/models/qc/schema.yml
@@ -923,12 +923,14 @@ models:
           - name: Dwelling % Change
             index: Z
             horizontal_align: right
+            number_format: "0.00%"
           - name: Sale Date
             index: AA
             horizontal_align: right
           - name: Total % Change
             index: U
             horizontal_align: right
+            number_format: "0.00%"
 
   - name: qc.vw_report_town_close_res_multicodes
     description: '{{ doc("view_vw_report_town_close_res_multicodes") }}'

diff --git a/dbt/scripts/export_models.py b/dbt/scripts/export_models.py
@@ -19,6 +19,9 @@
       since all templates are assumed to be .xlsx files. Templates should be stored in the export/templates/ directory and should include header
       rows. If unset, will search for a template with the same name as the model; if no template is found, defaults to a simple layout with
       filterable columns and striped rows.
+
+    * config.meta.export_format (optional): Formatting to apply to the output workbook. Useful for specific types of formatting, like alignment
+      and number formats, that Excel can only apply after populating a template with data
 """  # noqa: E501
 CLI_EXAMPLE = """Example usage to output the 2024 non-tri town close QC report for Leyden, which is a non-tri town in 2024:
 

diff --git a/dbt/scripts/utils/export.py b/dbt/scripts/utils/export.py
@@ -177,54 +177,73 @@ def export_models(
                 )
                 sheet.add_table(table)
 
+                # Parse column format settings by col index. Since column
+                # formatting needs to be applied at the cell level, we'll
+                # first parse all format settings for each column, and then
+                # we'll iterate every cell once to apply all formatting at the
+                # same time
+                column_format_by_index = {}
+
                 # If a parid column exists, format it explicitly as a
                 # 14-digit number to avoid Excel converting it to scientific
                 # notation or stripping out leading zeros
                 if "parid" in model_df or "pin" in model_df:
                     parid_field = "parid" if "parid" in model_df else "pin"
                     parid_index = model_df.columns.get_loc(parid_field)
-                    # Skip header row when applying formatting. We need to
-                    # catch the special case where there is only one row, or
-                    # else we will iterate the _cells_ in that row instead of
-                    # the row when slicing it from 2 : max_row
-                    non_header_rows = (
-                        [sheet[2]]
-                        if sheet.max_row == 2
-                        else sheet[2 : sheet.max_row]
-                    )
-                    for row in non_header_rows:
-                        row[parid_index].number_format = "00000000000000"
+                    column_format_by_index[parid_index] = {
+                        "number_format": "00000000000000",
                         # Left align since PINs do not actually need to be
                         # compared by order of magnitude the way that numbers
                         # do
-                        row[parid_index].alignment = Alignment(
-                            horizontal="left"
-                        )
+                        "alignment": Alignment(horizontal="left"),
+                    }
 
-                # Apply any column formatting that was configured
+                # Parse any formatting that is configured at the column level.
+                # Note that if formatting is configured for a column that we
+                # parsed as a parid column above, these settings will override
+                # the default parid settings from the block above
                 format_config = model["config"]["meta"].get(
                     "export_format", {}
                 )
                 if column_configs := format_config.get("columns"):
                     for column_config in column_configs:
-                        # Set horizontal alignment if config is present
+                        # The column index is required in order to set any
+                        # column-level configs
+                        col_letter = column_config.get("index")
+                        if col_letter is None:
+                            raise ValueError(
+                                "'index' attribute is required in "
+                                "export_format.columns config for "
+                                f"model {model_name}"
+                            )
+                        idx = column_index_from_string(col_letter) - 1
+                        # Initialize the config dict for this column if
+                        # none exists yet
+                        column_format_by_index[idx] = {}
+                        # Parse configs if they are present
+                        if number_format := column_config.get("number_format"):
+                            column_format_by_index[idx]["number_format"] = (
+                                number_format
+                            )
                         if horiz_align_dir := column_config.get(
                             "horizontal_align"
                         ):
-                            horizontal_alignment = Alignment(
-                                horizontal=horiz_align_dir
+                            column_format_by_index[idx]["alignment"] = (
+                                Alignment(horizontal=horiz_align_dir)
                             )
-                            col_letter = column_config.get("index")
-                            if col_letter is None:
-                                raise ValueError(
-                                    "'index' attribute is required when "
-                                    "'horizontal_align' is set on "
-                                    "export_format.columns config for "
-                                    f"model {model_name}"
-                                )
-                            idx = column_index_from_string(col_letter) - 1
-                            # Skip header row
-                            for row in sheet[2 : sheet.max_row]:
-                                row[idx].alignment = horizontal_alignment
+
+                # Skip header row when applying formatting. We need to
+                # catch the special case where there is only one row, or
+                # else we will iterate the _cells_ in that row instead of
+                # the row when slicing it from 2 : max_row
+                non_header_rows = (
+                    [sheet[2]]
+                    if sheet.max_row == 2
+                    else sheet[2 : sheet.max_row]
+                )
+                for row in non_header_rows:
+                    for idx, formats in column_format_by_index.items():
+                        for attr, val in formats.items():
+                            setattr(row[idx], attr, val)
 
         print(f"Exported model {model_name} to {output_path}")