Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Yaml based config and Implementation #50

Merged
merged 6 commits into from
Oct 29, 2024

Conversation

noman404
Copy link
Collaborator

@noman404 noman404 commented Oct 25, 2024

implementation of issue that closes #33 resolved #46 closes #47 fix #48

  • yaml file with variable, implemented, idtl
  • removed variables that haven't implemented yet from csv (placeholder variables)
  • enhance tests

- yaml file with variable, implemented, idtl
- removed variables that haven't implemented yet from csv (placeholder variables)
- enhance tests
- output based on idtl (2.standard, 5.full)
- enhanced yaml file with definition
- added test based on idtl
@noman404 noman404 marked this pull request as ready for review October 25, 2024 12:53
@noman404 noman404 changed the title issue fix #27 issue fix #33 Oct 25, 2024
@noman404 noman404 changed the title issue fix #33 Yaml based config and Implementation of issue fixes #33, #46, #47 #48 Oct 26, 2024
@noman404 noman404 changed the title Yaml based config and Implementation of issue fixes #33, #46, #47 #48 Yaml based config and Implementation Oct 27, 2024
Copy link
Contributor

@MaxGhenis MaxGhenis left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think you could simplify this with:

import unittest
import os
import subprocess
import pandas as pd
import numpy as np
from pathlib import Path
import platform
import sys


class E2ETest(unittest.TestCase):

    def setUp(self) -> None:
        self.project_root = Path(__file__).parent.parent
        self.taxsim_dir = self.project_root / "resources" / "taxsim35"
        self.output_dir = self.project_root / "output"
        self.output_dir.mkdir(exist_ok=True)

        # Determine the correct TAXSIM executable based on the OS
        system = platform.system().lower()
        if system == "darwin":
            self.taxsim_exe = "taxsim35-osx.exe"
        elif system == "windows":
            self.taxsim_exe = "taxsim35-windows.exe"
        elif system == "linux":
            self.taxsim_exe = "taxsim35-unix.exe"
        else:
            raise OSError(f"Unsupported operating system: {system}")

        self.input_file = self.taxsim_dir / "taxsim_input.csv"

    def test_generate_policyengine_taxsim(self):
        output_file = self.output_dir / "policyengine_taxsim_output.csv"

        cmd = f"{sys.executable} {self.project_root}/policyengine_taxsim/cli.py {self.input_file} -o {output_file}"
        process = subprocess.run(
            cmd, shell=True, capture_output=True, text=True
        )

        print(f"PolicyEngine TAXSIM CLI output:\n{process.stdout}")
        if process.returncode != 0:
            print(
                f"PolicyEngine TAXSIM CLI failed with error:\n{process.stderr}"
            )
            raise Exception(
                f"PolicyEngine TAXSIM CLI failed: {process.returncode}"
            )

        self.assertTrue(output_file.is_file())
        print(f"Content of {output_file}:")
        with open(output_file, "r") as f:
            print(f.read())

    def test_generate_taxsim_output(self):
        output_file = self.output_dir / "taxsim35_output.csv"

        taxsim_path = self.taxsim_dir / self.taxsim_exe

        if platform.system().lower() != "windows":
            # Make the file executable on Unix-like systems
            os.chmod(taxsim_path, 0o755)

        cmd = f"{taxsim_path} < {self.input_file} > {output_file}"
        process = subprocess.run(
            cmd, shell=True, capture_output=True, text=True
        )

        print(f"TAXSIM35 output:\n{process.stdout}")
        if process.returncode != 0:
            print(f"TAXSIM35 failed with error:\n{process.stderr}")
            raise Exception(f"TAXSIM35 failed: {process.returncode}")

        self.assertTrue(output_file.is_file())
        print(f"Content of {output_file}:")
        with open(output_file, "r") as f:
            print(f.read())

    def test_match_both_output(self):
        taxsim35_csv = pd.read_csv(self.output_dir / "taxsim35_output.csv")
        pe_taxsim_csv = pd.read_csv(
            self.output_dir / "policyengine_taxsim_output.csv"
        )
        input_csv = pd.read_csv(self.input_file)

        print("Input CSV:")
        print(input_csv)
        print("\nTAXSIM35 output:")
        print(taxsim35_csv)
        print("\nPolicyEngine TAXSIM output:")
        print(pe_taxsim_csv)

        # Ensure both DataFrames have the same columns
        common_columns = sorted(list(set(taxsim35_csv.columns) & set(pe_taxsim_csv.columns)))
        taxsim35_csv = taxsim35_csv[common_columns]
        pe_taxsim_csv = pe_taxsim_csv[common_columns]

        # Ensure both DataFrames have the same column names
        taxsim35_csv.columns = taxsim35_csv.columns.str.lower()
        pe_taxsim_csv.columns = pe_taxsim_csv.columns.str.lower()

        # Sort both DataFrames by taxsimid
        taxsim35_csv = taxsim35_csv.sort_values("taxsimid").reset_index(drop=True)
        pe_taxsim_csv = pe_taxsim_csv.sort_values("taxsimid").reset_index(drop=True)
        input_csv = input_csv.sort_values("taxsimid").reset_index(drop=True)

        # Convert numeric columns to float
        numeric_columns = taxsim35_csv.select_dtypes(include=["number"]).columns
        for col in numeric_columns:
            taxsim35_csv[col] = pd.to_numeric(taxsim35_csv[col], errors="coerce")
            pe_taxsim_csv[col] = pd.to_numeric(pe_taxsim_csv[col], errors="coerce")

        # Define columns to check based on output type
        standard_columns = ["year", "fiitax", "siitax"]
        full_columns = standard_columns + [
            "v10",  # state_agi
            "v45",
            "v26",
            "v13",
            "v19",
            "v28",
            "v18",
            "v34",
            "tfica"
        ]

        # Determine which columns to check based on idtl value
        columns_to_check = full_columns if (input_csv["idtl"] == 2).any() else standard_columns

        # Compare all relevant columns at once
        comparison_results = {}
        for col in columns_to_check:
            if col in common_columns:  # Only check if column exists in both datasets
                matches = (taxsim35_csv[col] == pe_taxsim_csv[col]).all()
                comparison_results[col] = matches
                if not matches:
                    print(f"Mismatch in column {col}:")
                    print(f"TAXSIM35 values: {taxsim35_csv[col].values}")
                    print(f"PolicyEngine values: {pe_taxsim_csv[col].values}")

        # Assert all columns match
        all_matched = all(comparison_results.values())
        self.assertTrue(all_matched, 
                       f"Columns with mismatches: {[col for col, matched in comparison_results.items() if not matched]}")


if __name__ == "__main__":
    unittest.main()

@MaxGhenis MaxGhenis merged commit a3f6be0 into PolicyEngine:main Oct 29, 2024
9 checks passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
2 participants