From b65e698835643f02b65ff8236cd5bcf644206a7e Mon Sep 17 00:00:00 2001
From: Nikhil Woodruff <nikhil.woodruff@outlook.com>
Date: Mon, 2 Dec 2024 12:12:34 +0000
Subject: [PATCH] Add budget charts and data

---
 .../outputs/macro/comparison/budget.py        |  44 ------
 .../macro/comparison/budget/general.py        | 109 +++++++++++++++
 .../programs.py}                              |   2 +-
 .../outputs/macro/comparison/budget/window.py |  68 ++++++++++
 .../parliamentary_constituencies.py           |  16 ++-
 .../parliamentary_constituencies/data.py      |  27 ----
 .../parliamentary_constituencies/heatmap.py   | 127 ------------------
 .../outputs/macro/single/gov/budget_window.py |  47 +++++++
 policyengine/simulation.py                    |   9 ++
 policyengine/utils/charts.py                  |  34 +++--
 10 files changed, 259 insertions(+), 224 deletions(-)
 delete mode 100644 policyengine/outputs/macro/comparison/budget.py
 create mode 100644 policyengine/outputs/macro/comparison/budget/general.py
 rename policyengine/outputs/macro/comparison/{detailed_budget.py => budget/programs.py} (96%)
 create mode 100644 policyengine/outputs/macro/comparison/budget/window.py
 delete mode 100644 policyengine/outputs/macro/comparison/local_areas/parliamentary_constituencies/data.py
 delete mode 100644 policyengine/outputs/macro/comparison/local_areas/parliamentary_constituencies/heatmap.py
 create mode 100644 policyengine/outputs/macro/single/gov/budget_window.py

diff --git a/policyengine/outputs/macro/comparison/budget.py b/policyengine/outputs/macro/comparison/budget.py
deleted file mode 100644
index 342a6f0..0000000
--- a/policyengine/outputs/macro/comparison/budget.py
+++ /dev/null
@@ -1,44 +0,0 @@
-from policyengine import Simulation
-
-
-def budget(simulation: Simulation):
-    """Calculate the budgetary impact of the given simulation.
-
-    Args:
-        simulation (Simulation): The simulation for which the revenue impact is to be calculated.
-
-    Returns:
-        dict: A dictionary containing the budgetary impact details with the following keys:
-            - budgetary_impact (float): The overall budgetary impact.
-            - tax_revenue_impact (float): The impact on tax revenue.
-            - state_tax_revenue_impact (float): The impact on state tax revenue.
-            - benefit_spending_impact (float): The impact on benefit spending.
-            - households (int): The number of households.
-            - baseline_net_income (float): The total net income in the baseline scenario.
-    """
-    baseline = simulation.calculate("macro/baseline")
-    reform = simulation.calculate("macro/reform")
-
-    tax_revenue_impact = (
-        reform["gov"]["balance"]["total_tax"]
-        - baseline["gov"]["balance"]["total_tax"]
-    )
-    state_tax_revenue_impact = (
-        reform["gov"]["balance"]["total_state_tax"]
-        - baseline["gov"]["balance"]["total_state_tax"]
-    )
-    benefit_spending_impact = (
-        reform["gov"]["balance"]["total_spending"]
-        - baseline["gov"]["balance"]["total_spending"]
-    )
-    budgetary_impact = tax_revenue_impact - benefit_spending_impact
-    households = sum(baseline["household"]["demographics"]["household_weight"])
-    baseline_net_income = baseline["household"]["finance"]["total_net_income"]
-    return dict(
-        budgetary_impact=budgetary_impact,
-        tax_revenue_impact=tax_revenue_impact,
-        state_tax_revenue_impact=state_tax_revenue_impact,
-        benefit_spending_impact=benefit_spending_impact,
-        households=households,
-        baseline_net_income=baseline_net_income,
-    )
diff --git a/policyengine/outputs/macro/comparison/budget/general.py b/policyengine/outputs/macro/comparison/budget/general.py
new file mode 100644
index 0000000..a6e7835
--- /dev/null
+++ b/policyengine/outputs/macro/comparison/budget/general.py
@@ -0,0 +1,109 @@
+from policyengine import Simulation
+import plotly.graph_objects as go
+import plotly.express as px
+import pandas as pd
+from policyengine.utils.charts import *
+
+
+def general(simulation: Simulation, chart: bool = False):
+    """Calculate the budgetary impact of the given simulation.
+
+    Args:
+        simulation (Simulation): The simulation for which the revenue impact is to be calculated.
+
+    Returns:
+        dict: A dictionary containing the budgetary impact details with the following keys:
+            - budgetary_impact (float): The overall budgetary impact.
+            - tax_revenue_impact (float): The impact on tax revenue.
+            - state_tax_revenue_impact (float): The impact on state tax revenue.
+            - benefit_spending_impact (float): The impact on benefit spending.
+            - households (int): The number of households.
+            - baseline_net_income (float): The total net income in the baseline scenario.
+    """
+    baseline = simulation.calculate("macro/baseline")
+    reform = simulation.calculate("macro/reform")
+
+    tax_revenue_impact = (
+        reform["gov"]["balance"]["total_tax"]
+        - baseline["gov"]["balance"]["total_tax"]
+    )
+    state_tax_revenue_impact = (
+        reform["gov"]["balance"]["total_state_tax"]
+        - baseline["gov"]["balance"]["total_state_tax"]
+    )
+    benefit_spending_impact = (
+        reform["gov"]["balance"]["total_spending"]
+        - baseline["gov"]["balance"]["total_spending"]
+    )
+    budgetary_impact = tax_revenue_impact - benefit_spending_impact
+    households = sum(baseline["household"]["demographics"]["household_weight"])
+    baseline_net_income = baseline["household"]["finance"]["total_net_income"]
+    result = dict(
+        budgetary_impact=budgetary_impact,
+        tax_revenue_impact=tax_revenue_impact,
+        state_tax_revenue_impact=state_tax_revenue_impact,
+        benefit_spending_impact=benefit_spending_impact,
+        households=households,
+        baseline_net_income=baseline_net_income,
+    )
+    if chart:
+        return budget_chart(simulation, result)
+    else:
+        return result
+
+
+def budget_chart(simulation: Simulation, data: dict) -> go.Figure:
+    if simulation.country == "uk":
+        x = ["Tax revenues", "Benefit spending", "Budgetary impact"]
+        y = [
+            data["tax_revenue_impact"],
+            -data["benefit_spending_impact"],
+            data["budgetary_impact"],
+        ]
+    else:
+        x = [
+            "Federal tax revenues",
+            "State tax revenues",
+            "Benefit spending",
+            "Budgetary impact",
+        ]
+        y = [
+            data["tax_revenue_impact"] - data["state_tax_revenue_impact"],
+            data["state_tax_revenue_impact"],
+            -data["benefit_spending_impact"],
+            data["budgetary_impact"],
+        ]
+    fig = go.Figure(
+        data=[
+            go.Waterfall(
+                x=x,
+                y=[i / 1e9 for i in y],
+                orientation="v",
+                measure=["relative"] * (len(x) - 1) + ["total"],
+                text=[
+                    (
+                        "+" + str(round(val / 1e9, 1))
+                        if val > 0
+                        else str(round(val / 1e9, 1))
+                    )
+                    for val in y
+                ],
+                textposition="inside",
+                increasing={"marker": {"color": BLUE}},
+                decreasing={"marker": {"color": DARK_GRAY}},
+                totals={"marker": {"color": BLUE if y[-1] > 0 else DARK_GRAY}},
+                connector={
+                    "line": {"color": DARK_GRAY, "width": 2, "dash": "dot"}
+                },
+            )
+        ]
+    )
+
+    fig.update_layout(
+        title="Budgetary impact by government revenue and spending",
+        xaxis_title="",
+        yaxis_title="Budgetary impact (£ billions)",
+        yaxis_tickformat=",.0f",
+    )
+
+    return format_fig(fig)
diff --git a/policyengine/outputs/macro/comparison/detailed_budget.py b/policyengine/outputs/macro/comparison/budget/programs.py
similarity index 96%
rename from policyengine/outputs/macro/comparison/detailed_budget.py
rename to policyengine/outputs/macro/comparison/budget/programs.py
index 3426abf..7d1b9a2 100644
--- a/policyengine/outputs/macro/comparison/detailed_budget.py
+++ b/policyengine/outputs/macro/comparison/budget/programs.py
@@ -1,7 +1,7 @@
 from policyengine import Simulation
 
 
-def detailed_budget(simulation: Simulation):
+def programs(simulation: Simulation):
     """Calculate the detailed budgetary impact of the given simulation.
 
     Args:
diff --git a/policyengine/outputs/macro/comparison/budget/window.py b/policyengine/outputs/macro/comparison/budget/window.py
new file mode 100644
index 0000000..7a8b19c
--- /dev/null
+++ b/policyengine/outputs/macro/comparison/budget/window.py
@@ -0,0 +1,68 @@
+from policyengine import Simulation
+import plotly.graph_objects as go
+import plotly.express as px
+import pandas as pd
+from policyengine.utils.charts import *
+
+
+def window(
+    simulation: Simulation,
+    chart: bool = False,
+    federal_only: bool = False,
+    count_years: int = 1,
+):
+    if count_years == 1:
+        kwargs = {}
+    else:
+        kwargs = {"count_years": count_years}
+    baseline = simulation.calculate(
+        "macro/baseline/gov/budget_window", **kwargs
+    )
+    reform = simulation.calculate("macro/reform/gov/budget_window", **kwargs)
+    total_budget_effect = [
+        (y - x) / 1e9
+        for x, y in zip(baseline["total_budget"], reform["total_budget"])
+    ]
+    federal_budget_effect = [
+        (y - x) / 1e9
+        for x, y in zip(
+            baseline["total_federal_budget"], reform["total_federal_budget"]
+        )
+    ]
+
+    result = dict(
+        total_budget=total_budget_effect,
+        federal_budget=federal_budget_effect,
+    )
+
+    if chart:
+        return budget_window_chart(
+            result["federal_budget"]
+            if federal_only
+            else result["total_budget"]
+        )
+    else:
+        return result
+
+
+def budget_window_chart(budget_effect) -> go.Figure:
+    fig = go.Figure(
+        data=[
+            go.Bar(
+                y=budget_effect,
+                x=list(map(str, range(2025, 2025 + 10))),
+                marker=dict(
+                    color=[BLUE if y > 0 else DARK_GRAY for y in budget_effect]
+                ),
+                text=[
+                    f"{'+' if y >= 0 else ''}{y:.1f}" for y in budget_effect
+                ],
+            )
+        ]
+    ).update_layout(
+        title="Budgetary impact by year",
+        xaxis_title="Year",
+        yaxis_title="Budgetary impact (£ billions)",
+    )
+
+    return format_fig(fig)
diff --git a/policyengine/outputs/macro/comparison/local_areas/parliamentary_constituencies.py b/policyengine/outputs/macro/comparison/local_areas/parliamentary_constituencies.py
index 750bfe9..8a78daf 100644
--- a/policyengine/outputs/macro/comparison/local_areas/parliamentary_constituencies.py
+++ b/policyengine/outputs/macro/comparison/local_areas/parliamentary_constituencies.py
@@ -6,12 +6,12 @@
 
 
 def parliamentary_constituencies(
-        simulation: Simulation, 
-        chart: bool = False, 
-        variable: str = None,
-        aggregator: str = None,
-        relative: bool = None,
-    ) -> dict:
+    simulation: Simulation,
+    chart: bool = False,
+    variable: str = None,
+    aggregator: str = None,
+    relative: bool = None,
+) -> dict:
     if not simulation.options.get("include_constituencies"):
         return {}
 
@@ -45,9 +45,11 @@ def parliamentary_constituencies(
     return result
 
 
-
 def heatmap(
     simulation: Simulation,
+    variable: str = None,
+    aggregator: str = None,
+    relative: bool = None,
 ) -> dict:
     if not simulation.options.get("include_constituencies"):
         return {}
diff --git a/policyengine/outputs/macro/comparison/local_areas/parliamentary_constituencies/data.py b/policyengine/outputs/macro/comparison/local_areas/parliamentary_constituencies/data.py
deleted file mode 100644
index 7fd3fb3..0000000
--- a/policyengine/outputs/macro/comparison/local_areas/parliamentary_constituencies/data.py
+++ /dev/null
@@ -1,27 +0,0 @@
-from policyengine import Simulation
-
-
-def data(simulation: Simulation) -> dict:
-    if not simulation.options.get("include_constituencies"):
-        return {}
-
-    constituency_baseline = simulation.calculate(
-        "macro/baseline/gov/local_areas/parliamentary_constituencies"
-    )
-    constituency_reform = simulation.calculate(
-        "macro/reform/gov/local_areas/parliamentary_constituencies"
-    )
-
-    result = {}
-
-    for constituency in constituency_baseline:
-        result[constituency] = {}
-        for key in constituency_baseline[constituency]:
-            result[constituency][key] = {
-                "change": constituency_reform[constituency][key]
-                - constituency_baseline[constituency][key],
-                "baseline": constituency_baseline[constituency][key],
-                "reform": constituency_reform[constituency][key],
-            }
-
-    return result
diff --git a/policyengine/outputs/macro/comparison/local_areas/parliamentary_constituencies/heatmap.py b/policyengine/outputs/macro/comparison/local_areas/parliamentary_constituencies/heatmap.py
deleted file mode 100644
index 24b8f19..0000000
--- a/policyengine/outputs/macro/comparison/local_areas/parliamentary_constituencies/heatmap.py
+++ /dev/null
@@ -1,127 +0,0 @@
-from policyengine import Simulation
-import pandas as pd
-from policyengine.utils.huggingface import download
-import plotly.express as px
-from policyengine.utils.charts import *
-
-
-def heatmap(
-    simulation: Simulation,
-    variable: str = None,
-    aggregator: str = None,
-    relative: bool = None,
-) -> dict:
-    if not simulation.options.get("include_constituencies"):
-        return {}
-
-    options = {}
-
-    if variable is not None:
-        options["variables"] = [variable]
-    if aggregator is not None:
-        options["aggregator"] = aggregator
-
-    constituency_baseline = simulation.calculate(
-        "macro/baseline/gov/local_areas/parliamentary_constituencies",
-        **options,
-    )
-    constituency_reform = simulation.calculate(
-        "macro/reform/gov/local_areas/parliamentary_constituencies", **options
-    )
-
-    result = {}
-
-    constituency_names_file_path = download(
-        repo="policyengine/policyengine-uk-data",
-        repo_filename="constituencies_2024.csv",
-        local_folder=None,
-        version=None,
-    )
-    constituency_names = pd.read_csv(constituency_names_file_path)
-
-    if variable is None:
-        variable = "household_net_income"
-    if relative is None:
-        relative = True
-
-    for constituency in constituency_baseline:
-        if relative:
-            result[constituency] = (
-                constituency_reform[constituency][variable]
-                / constituency_baseline[constituency][variable]
-                - 1
-            )
-        else:
-            result[constituency] = (
-                constituency_reform[constituency][variable]
-                - constituency_baseline[constituency][variable]
-            )
-
-    x_range = constituency_names["x"].max() - constituency_names["x"].min()
-    y_range = constituency_names["y"].max() - constituency_names["y"].min()
-    # Expand x range to preserve aspect ratio
-    expanded_lower_x_range = -(y_range - x_range) / 2
-    expanded_upper_x_range = x_range - expanded_lower_x_range
-    constituency_names.x = (
-        constituency_names.x - (constituency_names.y % 2 == 0) * 0.5
-    )
-    constituency_names["Relative change"] = (
-        pd.Series(list(result.values()), index=list(result.keys()))
-        .loc[constituency_names["name"]]
-        .values
-    )
-
-    label = simulation.baseline.tax_benefit_system.variables[variable].label
-
-    fig = px.scatter(
-        constituency_names,
-        x="x",
-        y="y",
-        color="Relative change",
-        hover_name="name",
-        title=f"{'Relative change' if relative else 'Change'} in {label} by parliamentary constituency",
-    )
-
-    format_fig(fig)
-
-    # Show hexagons on scatter points
-
-    fig.update_traces(
-        marker=dict(
-            symbol="hexagon", line=dict(width=0, color="lightgray"), size=15
-        )
-    )
-    fig.update_layout(
-        xaxis_tickvals=[],
-        xaxis_title="",
-        yaxis_tickvals=[],
-        yaxis_title="",
-        xaxis_range=[expanded_lower_x_range, expanded_upper_x_range],
-        yaxis_range=[
-            constituency_names["y"].min(),
-            constituency_names["y"].max(),
-        ],
-    ).update_traces(marker_size=10).update_layout(
-        xaxis_range=[30, 85], yaxis_range=[-50, 2]
-    )
-
-    x_min = fig.data[0]["marker"]["color"].min()
-    x_max = fig.data[0]["marker"]["color"].max()
-    max_abs = max(abs(x_min), abs(x_max))
-
-    fig.update_layout(
-        coloraxis=dict(
-            cmin=-max_abs,
-            cmax=max_abs,
-            colorscale=[
-                [0, DARK_GRAY],
-                [0.5, "lightgray"],
-                [1, BLUE],
-            ],
-            colorbar=dict(
-                tickformat=".0%" if relative else ",.0f",
-            ),
-        )
-    )
-
-    return fig
diff --git a/policyengine/outputs/macro/single/gov/budget_window.py b/policyengine/outputs/macro/single/gov/budget_window.py
new file mode 100644
index 0000000..c21b140
--- /dev/null
+++ b/policyengine/outputs/macro/single/gov/budget_window.py
@@ -0,0 +1,47 @@
+from policyengine import Simulation
+
+DEFAULT_COUNT_YEARS = 1
+
+
+def budget_window(simulation: Simulation, count_years: int = None) -> dict:
+    sim = simulation.selected
+    current_year = simulation.time_period
+    if count_years is not None:
+        years = list(range(current_year, current_year + count_years))
+    else:
+        years = [current_year]
+    if simulation.country == "uk":
+        total_tax = [sim.calculate("gov_tax", year).sum() for year in years]
+        total_spending = [
+            sim.calculate("gov_spending", year).sum() for year in years
+        ]
+        total_state_tax = [0 for year in years]
+        total_budget = [
+            total_tax[i] - total_spending[i] for i in range(len(years))
+        ]
+        total_federal_budget = total_budget
+    elif simulation.country == "us":
+        total_tax = [
+            sim.calculate("household_tax", year).sum() for year in years
+        ]
+        total_spending = [
+            sim.calculate("household_benefits", year).sum() for year in years
+        ]
+        total_state_tax = [
+            sim.calculate("household_state_income_tax", year).sum()
+            for year in years
+        ]
+        total_budget = [
+            total_tax[i] - total_spending[i] for i in range(len(years))
+        ]
+        total_federal_budget = [
+            total_tax[i] - total_spending[i] - total_state_tax
+            for i in range(len(years))
+        ]
+    return {
+        "total_tax": total_tax,
+        "total_spending": total_spending,
+        "total_state_tax": total_state_tax,
+        "total_budget": total_budget,
+        "total_federal_budget": total_federal_budget,
+    }
diff --git a/policyengine/simulation.py b/policyengine/simulation.py
index bc5ee8e..83ead51 100644
--- a/policyengine/simulation.py
+++ b/policyengine/simulation.py
@@ -247,6 +247,10 @@ def _initialise_simulations(self):
             reform=self.baseline,
         )
         self.baseline.default_calculation_period = self.time_period
+
+        if "subsample" in self.options:
+            self.baseline = self.baseline.subsample(self.options["subsample"])
+
         if self.comparison:
             self.reformed = _simulation_type(
                 dataset=self.data if macro else None,
@@ -254,3 +258,8 @@ def _initialise_simulations(self):
                 reform=self.reform,
             )
             self.reformed.default_calculation_period = self.time_period
+
+            if "subsample" in self.options:
+                self.reformed = self.reformed.subsample(
+                    self.options["subsample"]
+                )
diff --git a/policyengine/utils/charts.py b/policyengine/utils/charts.py
index 0051eb7..9923c19 100644
--- a/policyengine/utils/charts.py
+++ b/policyengine/utils/charts.py
@@ -59,6 +59,19 @@ def format_fig(fig: go.Figure, country: str = "uk") -> go.Figure:
             color="black",
         )
     )
+
+    # set template
+    fig.update_layout(
+        template="plotly_white",
+        height=600,
+        width=800,
+        plot_bgcolor="lightgray",  # set background color to light gray
+        paper_bgcolor="lightgray",  # set paper background color to white
+        # No white grid marks
+        xaxis=dict(gridcolor="lightgray", zerolinecolor="lightgray"),
+        yaxis=dict(gridcolor="lightgray", zerolinecolor="lightgray"),
+    )
+
     fig.add_layout_image(
         dict(
             source="https://raw.githubusercontent.com/PolicyEngine/policyengine-app/master/src/images/logos/policyengine/blue.png",
@@ -80,26 +93,11 @@ def format_fig(fig: go.Figure, country: str = "uk") -> go.Figure:
         text=f"Source: PolicyEngine tax-benefit microsimulation model (version {version})",
         xref="paper",
         yref="paper",
-        x=-0.1,
+        x=0,
         y=-0.15,
         showarrow=False,
-    )
-
-    fig.update_traces(
-        # No border
-        marker=dict(line=dict(width=0)),
-    )
-
-    # set template
-    fig.update_layout(
-        template="plotly_white",
-        height=600,
-        width=800,
-        plot_bgcolor="lightgray",  # set background color to light gray
-        paper_bgcolor="lightgray",  # set paper background color to white
-        # No white grid marks
-        xaxis=dict(gridcolor="lightgray", zerolinecolor="lightgray"),
-        yaxis=dict(gridcolor="lightgray", zerolinecolor="lightgray"),
+        xanchor="left",
+        yanchor="bottom",
     )
     # don't show modebar
     fig.update_layout(