Yaml based config and Implementation #50

noman404 · 2024-10-25T09:53:02Z

implementation of issue that closes #33 resolved #46 closes #47 fix #48

yaml file with variable, implemented, idtl
removed variables that haven't implemented yet from csv (placeholder variables)
enhance tests

- yaml file with variable, implemented, idtl - removed variables that haven't implemented yet from csv (placeholder variables) - enhance tests

- output based on idtl (2.standard, 5.full) - enhanced yaml file with definition - added test based on idtl

- added more tests

closes PolicyEngine#33 closes PolicyEngine#47 resolved PolicyEngine#46 fix PolicyEngine#48

MaxGhenis

i think you could simplify this with:

import unittest
import os
import subprocess
import pandas as pd
import numpy as np
from pathlib import Path
import platform
import sys


class E2ETest(unittest.TestCase):

    def setUp(self) -> None:
        self.project_root = Path(__file__).parent.parent
        self.taxsim_dir = self.project_root / "resources" / "taxsim35"
        self.output_dir = self.project_root / "output"
        self.output_dir.mkdir(exist_ok=True)

        # Determine the correct TAXSIM executable based on the OS
        system = platform.system().lower()
        if system == "darwin":
            self.taxsim_exe = "taxsim35-osx.exe"
        elif system == "windows":
            self.taxsim_exe = "taxsim35-windows.exe"
        elif system == "linux":
            self.taxsim_exe = "taxsim35-unix.exe"
        else:
            raise OSError(f"Unsupported operating system: {system}")

        self.input_file = self.taxsim_dir / "taxsim_input.csv"

    def test_generate_policyengine_taxsim(self):
        output_file = self.output_dir / "policyengine_taxsim_output.csv"

        cmd = f"{sys.executable} {self.project_root}/policyengine_taxsim/cli.py {self.input_file} -o {output_file}"
        process = subprocess.run(
            cmd, shell=True, capture_output=True, text=True
        )

        print(f"PolicyEngine TAXSIM CLI output:\n{process.stdout}")
        if process.returncode != 0:
            print(
                f"PolicyEngine TAXSIM CLI failed with error:\n{process.stderr}"
            )
            raise Exception(
                f"PolicyEngine TAXSIM CLI failed: {process.returncode}"
            )

        self.assertTrue(output_file.is_file())
        print(f"Content of {output_file}:")
        with open(output_file, "r") as f:
            print(f.read())

    def test_generate_taxsim_output(self):
        output_file = self.output_dir / "taxsim35_output.csv"

        taxsim_path = self.taxsim_dir / self.taxsim_exe

        if platform.system().lower() != "windows":
            # Make the file executable on Unix-like systems
            os.chmod(taxsim_path, 0o755)

        cmd = f"{taxsim_path} < {self.input_file} > {output_file}"
        process = subprocess.run(
            cmd, shell=True, capture_output=True, text=True
        )

        print(f"TAXSIM35 output:\n{process.stdout}")
        if process.returncode != 0:
            print(f"TAXSIM35 failed with error:\n{process.stderr}")
            raise Exception(f"TAXSIM35 failed: {process.returncode}")

        self.assertTrue(output_file.is_file())
        print(f"Content of {output_file}:")
        with open(output_file, "r") as f:
            print(f.read())

    def test_match_both_output(self):
        taxsim35_csv = pd.read_csv(self.output_dir / "taxsim35_output.csv")
        pe_taxsim_csv = pd.read_csv(
            self.output_dir / "policyengine_taxsim_output.csv"
        )
        input_csv = pd.read_csv(self.input_file)

        print("Input CSV:")
        print(input_csv)
        print("\nTAXSIM35 output:")
        print(taxsim35_csv)
        print("\nPolicyEngine TAXSIM output:")
        print(pe_taxsim_csv)

        # Ensure both DataFrames have the same columns
        common_columns = sorted(list(set(taxsim35_csv.columns) & set(pe_taxsim_csv.columns)))
        taxsim35_csv = taxsim35_csv[common_columns]
        pe_taxsim_csv = pe_taxsim_csv[common_columns]

        # Ensure both DataFrames have the same column names
        taxsim35_csv.columns = taxsim35_csv.columns.str.lower()
        pe_taxsim_csv.columns = pe_taxsim_csv.columns.str.lower()

        # Sort both DataFrames by taxsimid
        taxsim35_csv = taxsim35_csv.sort_values("taxsimid").reset_index(drop=True)
        pe_taxsim_csv = pe_taxsim_csv.sort_values("taxsimid").reset_index(drop=True)
        input_csv = input_csv.sort_values("taxsimid").reset_index(drop=True)

        # Convert numeric columns to float
        numeric_columns = taxsim35_csv.select_dtypes(include=["number"]).columns
        for col in numeric_columns:
            taxsim35_csv[col] = pd.to_numeric(taxsim35_csv[col], errors="coerce")
            pe_taxsim_csv[col] = pd.to_numeric(pe_taxsim_csv[col], errors="coerce")

        # Define columns to check based on output type
        standard_columns = ["year", "fiitax", "siitax"]
        full_columns = standard_columns + [
            "v10",  # state_agi
            "v45",
            "v26",
            "v13",
            "v19",
            "v28",
            "v18",
            "v34",
            "tfica"
        ]

        # Determine which columns to check based on idtl value
        columns_to_check = full_columns if (input_csv["idtl"] == 2).any() else standard_columns

        # Compare all relevant columns at once
        comparison_results = {}
        for col in columns_to_check:
            if col in common_columns:  # Only check if column exists in both datasets
                matches = (taxsim35_csv[col] == pe_taxsim_csv[col]).all()
                comparison_results[col] = matches
                if not matches:
                    print(f"Mismatch in column {col}:")
                    print(f"TAXSIM35 values: {taxsim35_csv[col].values}")
                    print(f"PolicyEngine values: {pe_taxsim_csv[col].values}")

        # Assert all columns match
        all_matched = all(comparison_results.values())
        self.assertTrue(all_matched, 
                       f"Columns with mismatches: {[col for col, matched in comparison_results.items() if not matched]}")


if __name__ == "__main__":
    unittest.main()

closes PolicyEngine#33 closes PolicyEngine#47 resolved PolicyEngine#46 resolved PolicyEngine#45 fix PolicyEngine#48

issue fix PolicyEngine#27

4ca90cf

- yaml file with variable, implemented, idtl - removed variables that haven't implemented yet from csv (placeholder variables) - enhance tests

noman404 requested a review from MaxGhenis October 25, 2024 09:53

issue fix PolicyEngine#27

94f3f82

- output based on idtl (2.standard, 5.full) - enhanced yaml file with definition - added test based on idtl

noman404 marked this pull request as ready for review October 25, 2024 12:53

noman404 changed the title ~~issue fix #27~~ issue fix #33 Oct 25, 2024

noman404 added 2 commits October 26, 2024 00:31

- disable property tax

edccc31

- added more tests

- added more tests

52f2b9f

noman404 changed the title ~~issue fix #33~~ Yaml based config and Implementation of issue fixes #33, #46, #47 #48 Oct 26, 2024

noman404 changed the title ~~Yaml based config and Implementation of issue fixes #33, #46, #47 #48~~ Yaml based config and Implementation Oct 27, 2024

implemented all the generated non zero variables in e2e tests

2761430

closes PolicyEngine#33 closes PolicyEngine#47 resolved PolicyEngine#46 fix PolicyEngine#48

MaxGhenis requested changes Oct 28, 2024

View reviewed changes

noman404 requested a review from MaxGhenis October 29, 2024 00:15

improve test code as CR

aed1787

closes PolicyEngine#33 closes PolicyEngine#47 resolved PolicyEngine#46 resolved PolicyEngine#45 fix PolicyEngine#48

MaxGhenis approved these changes Oct 29, 2024

View reviewed changes

MaxGhenis merged commit a3f6be0 into PolicyEngine:main Oct 29, 2024
9 checks passed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Yaml based config and Implementation #50

Yaml based config and Implementation #50

noman404 commented Oct 25, 2024 •

edited

Loading

MaxGhenis left a comment •

edited

Loading

Yaml based config and Implementation #50

Yaml based config and Implementation #50

Conversation

noman404 commented Oct 25, 2024 • edited Loading

MaxGhenis left a comment • edited Loading

Choose a reason for hiding this comment

noman404 commented Oct 25, 2024 •

edited

Loading

MaxGhenis left a comment •

edited

Loading