diff --git a/.gitignore b/.gitignore index 7b80a12..8fb592f 100644 --- a/.gitignore +++ b/.gitignore @@ -168,5 +168,4 @@ cython_debug/ .idea/ input/* output.csv -taxsim_input.csv -output/* \ No newline at end of file +taxsim_input.csv \ No newline at end of file diff --git a/README.md b/README.md index 3629dc4..f22449a 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,13 @@ TAXSIM emulator using the PolicyEngine US federal and state tax calculator ## How to use the emulator ## The emulator takes a .csv file in the form of a csv. This is the same form of input that Taxsim-35 takes. -In terminal, run +**Open your terminal in the parent directory** + +1. Install necessary dependencies + +`pip install -e .` + +2. to execute the simulation, run `python policyengine_taxsim/cli.py resources/taxsim35/taxsim_input.csv ` diff --git a/TaxsimInputReader.py b/TaxsimInputReader.py deleted file mode 100644 index 2a6367f..0000000 --- a/TaxsimInputReader.py +++ /dev/null @@ -1,221 +0,0 @@ -import pandas as pd - -import numpy as np - - -# TO DO: add data validation for each row. Need same number of data points in each row - -# class reads taxsim input and outputs a list of policy engine situations -class InputReader: - def __init__(self, file_path): - self.file_path = file_path - self.df = self.read_csv() - self.situations = self.get_situations() - self.output_level = self.get_output_level() - - def read_csv(self): - return pd.read_csv(self.file_path, index_col=False) - - def get_filing_status(self, mstat): - filing_status_map = { - 1: "single", - 2: "married_jointly", - 6: "married_separately", - 8: "dependent_child", - 5: "head_of_household" - } - return filing_status_map.get(mstat, f"Unknown filing status: {mstat}") - - def get_state_code(self, state_number): - state_mapping = { - 1: "AL", 2: "AK", 3: "AZ", 4: "AR", 5: "CA", 6: "CO", 7: "CT", 8: "DE", 9: "DC", 10: "FL", - 11: "GA", 12: "HI", 13: "ID", 14: "IL", 15: "IN", 16: "IA", 17: "KS", 18: "KY", 19: "LA", - 20: "ME", 21: "MD", 22: "MA", 23: "MI", 24: "MN", 25: "MS", 26: "MO", 27: "MT", 28: "NE", - 29: "NV", 30: "NH", 31: "NJ", 32: "NM", 33: "NY", 34: "NC", 35: "ND", 36: "OH", 37: "OK", - 38: "OR", 39: "PA", 40: "RI", 41: "SC", 42: "SD", 43: "TN", 44: "TX", 45: "UT", 46: "VT", - 47: "VA", 48: "WA", 49: "WV", 50: "WI", 51: "WY" - } - return state_mapping.get(state_number, "Invalid state number") - - # Default situation from policy engine - def create_default_situation(self, year): - return { - "people": { - "you": { - "age": {year: 40}, - }, - }, - "families": { - "your family": { - "members": ["you"], - } - }, - "marital_units": { - "your marital unit": { - "members": ["you"], - }, - }, - "tax_units": { - "your tax unit": { - "members": ["you"], - }, - }, - "spm_units": { - "your household": { - "members": ["you"], - } - }, - "households": { - "your household": { - "members": ["you"], - } - } - } - - # iterate through the input file and add the input variable information to the situation - def add_variables(self, situation, row, year): - # list of working income variables - income_variable_map = { - "pwages": "employment_income", - "swages": "employment_income", - "psemp": "self_employment_income_last_year", - "ssemp": "self_employment_income_last_year", - "dividends": "qualified_dividend_income", - "intrec": "taxable_interest_income", - "stcg": "short_term_capital_gains", - "ltcg": "long_term_capital_gains", - "pui": "unemployment_compensation", - "sui": "unemployment_compensation", - "proptax": "real_estate_taxes" - } - - primary_variables = ["pwages", "psemp", "dividends", "intrec", "stcg", "ltcg", "pui", "proptax"] - spouse_variables = ["swages", "ssemp", "sui"] - - primary_taxpayer = "you" - spouse_taxpayer = "your partner" - - for taxsim_var, situation_var in income_variable_map.items(): - value = row.get(taxsim_var, None) - if pd.notna(value): - if taxsim_var in primary_variables: - if primary_taxpayer not in situation["people"]: - situation["people"][primary_taxpayer] = {} - situation["people"][primary_taxpayer].setdefault(situation_var, {})[year] = value - elif taxsim_var in spouse_variables: - if spouse_taxpayer not in situation["people"]: - situation["people"][spouse_taxpayer] = { - "age": {year: 40}, # Default age, should be updated from row if available - } - situation["people"][spouse_taxpayer].setdefault(situation_var, {})[year] = value - else: - situation["people"][primary_taxpayer].setdefault(situation_var, {})[year] = value - - # add the spouse and their age/income to correct units in situation - def add_spouse(self, situation, row, year): - spouse_person_id = "your partner" - situation["people"][spouse_person_id] = { - "age": {year: int(row.get('sage', 40))}, - "employment_income": {year: int(row.get('swages', 0))}, - "is_tax_unit_spouse": {year: True}, - } - # Update units with spouse - units_to_update = { - "families": "your family", - "marital_units": "your marital unit", - "tax_units": "your tax unit", - "spm_units": "your household", - "households": "your household" - } - for unit, unit_name in units_to_update.items(): - situation[unit][unit_name].setdefault("members", []).append(spouse_person_id) - - # add dependents and their ages to the correct units - def add_dependents(self, situation, row, year, depx): - ordinal = { - 1: "first", - 2: "second", - 3: "third", - 4: "fourth", - 5: "fifth" - } - # taxsim limits the number of dependents at 3. - for i in range(1, min(depx, 3) + 1): - dependent_id = ordinal.get(i) - dependent_name = "your " + dependent_id + " dependent" - dependent_age = int(row.get(f'age{i}', 10)) - situation["people"][dependent_name] = { - "age": {year: dependent_age}, - } - situation["families"]["your family"]["members"].append(dependent_name) - situation["spm_units"]["your household"]["members"].append(dependent_name) - situation["households"]["your household"]["members"].append(dependent_name) - situation["tax_units"]["your tax unit"]["members"].append(dependent_name) - - # create the situation - def create_situation(self, row): - year = str(convert_to_int(row['year'])) - state = self.get_state_code(row['state']) - mstat = row['mstat'] - depx_value = row.get('depx') - - if pd.notna(depx_value): - depx = int(depx_value) - else: - depx = 0 - - situation = self.create_default_situation(year) - - # Add primary taxpayer details - main_person_id = "you" - situation["people"][main_person_id]["age"] = {year: int(row.get('page', 40))} - situation["people"][main_person_id]["employment_income"] = {year: int(row.get('pwages', 0))} - - # Add state to the household - situation["households"]["your household"]["state_name"] = {year: state} - - if mstat == 2: - self.add_spouse(situation, row, year) - - if depx > 0: - self.add_dependents(situation, row, year, depx) - - # Add additional variables - self.add_variables(situation, row, year) - - return situation - - # return the output level based on input idtl value - def get_output_level(self): - idtl_values = self.df['idtl'].unique() - if 5 in idtl_values: - return "text_descriptions" - elif 2 in idtl_values: - return "full" - elif 0 in idtl_values: - return "standard" - else: - raise ValueError("Unknown idtl value") - - def get_situations(self): - situations = [] - for index, row in self.df.iterrows(): - situation = self.create_situation(row) - situation = convert_to_int(situation) - situations.append(situation) - return situations - - # Convert int64 and floats to int in the situation - - -def convert_to_int(obj): - if isinstance(obj, np.int64): - return int(obj) - elif isinstance(obj, dict): - return {key: convert_to_int(value) for key, value in obj.items()} - elif isinstance(obj, list): - return [convert_to_int(element) for element in obj] - elif isinstance(obj, float): - return int(obj) - else: - return obj diff --git a/input_mapper.py b/input_mapper.py deleted file mode 100644 index 967de98..0000000 --- a/input_mapper.py +++ /dev/null @@ -1,494 +0,0 @@ -from policyengine_us import Simulation -from policyengine_us import parameters -from policyengine_us.model_api import * - -import pandas as pd - -import argparse - -import os - -from TaxsimInputReader import InputReader - -import importlib.metadata - -# Example simulation imported from policyengine.org -# Single 40 years old parent with $100,000 income, 2 kids (10 years old), living in CA -situation1 = { - "people": { - "you": { - "age": { - "2024": 40 - }, - "employment_income": { - "2024": 100000 - } - }, - "your first dependent": { - "age": { - "2024": 10 - }, - "employment_income": { - "2024": 0 - } - }, - "your second dependent": { - "age": { - "2024": 10 - }, - "employment_income": { - "2024": 0 - } - } - }, - "families": { - "your family": { - "members": [ - "you", - "your first dependent", - "your second dependent" - ] - } - }, - "marital_units": { - "your marital unit": { - "members": [ - "you" - ] - }, - "your first dependent's marital unit": { - "members": [ - "your first dependent" - ], - "marital_unit_id": { - "2024": 1 - } - }, - "your second dependent's marital unit": { - "members": [ - "your second dependent" - ], - "marital_unit_id": { - "2024": 2 - } - } - }, - "tax_units": { - "your tax unit": { - "members": [ - "you", - "your first dependent", - "your second dependent" - ] - } - }, - "spm_units": { - "your household": { - "members": [ - "you", - "your first dependent", - "your second dependent" - ] - } - }, - "households": { - "your household": { - "members": [ - "you", - "your first dependent", - "your second dependent" - ], - "state_name": { - "2024": "CA" - } - } - } -} - -situation2 = { - "people": { - "you": { - "age": { - "2024": 40 - }, - "employment_income": { - "2024": 100000 - } - }, - "your partner": { - "age": { - "2024": 40 - }, - "employment_income": { - "2024": 50000 - } - } - }, - "families": { - "your family": { - "members": [ - "you", - "your partner" - ] - } - }, - "marital_units": { - "your marital unit": { - "members": [ - "you", - "your partner" - ] - } - }, - "tax_units": { - "your tax unit": { - "members": [ - "you", - "your partner" - ] - } - }, - "spm_units": { - "your household": { - "members": [ - "you", - "your partner" - ] - } - }, - "households": { - "your household": { - "members": [ - "you", - "your partner" - ], - "state_name": { - "2024": "AL" - } - } - } -} - - -def read_input_file(input_file): - reader = InputReader(input_file) - return reader.situations - - -# input a list of situations and convert each situation into a simulation object -def make_simulation(list_of_households): - list_of_simulations = [] - for situation in list_of_households: - list_of_simulations.append(Simulation(situation=situation, )) - return list_of_simulations - - -# Return true if the string is a date -def is_date(string): - try: - pd.to_datetime(string, format='%Y') - return True - except Exception: - return False - - -# Return true if the string can create a StateCode instance -def is_state_code(string): - try: - StateCode[string] - return True - except Exception: - return False - - -# Get the user's state -def get_state(situation): - year_and_state = list(situation["households"]["your household"]["state_name"].items()) - for item in year_and_state: - for string in item: - if is_state_code(string): - return string - - -# Get the tax filing year -def get_year(situation): - year_and_state = list(situation["households"]["your household"]["state_name"].items()) - for item in year_and_state: - for string in item: - if is_date(string): - return string - - -# Returns the itemized_deduction function for the user's state -def state_itemized_deductions(situation): - state = get_state(situation).lower() - return state + "_itemized_deductions" - - -# Returns the standard_deduction function for the user's state -def state_standard_deduction(situation): - state = get_state(situation).lower() - return state + "_standard_deduction" - - -# Returns the function that computes Child and Dependent Care Credit for the user's state -def state_child_care_credit(situation): - state = get_state(situation).lower() - return state + "_cdcc" - - -# Returns the function that computes the user's AGI based on filing state -def state_adjusted_gross_income(situation): - state = get_state(situation).lower() - return state + "_agi" - - -# Returns the function that computes the user's state taxable income -def state_taxable_income(situation): - state = get_state(situation).lower() - return state + "_taxable_income" - - -# Returns the function that computes state income tax -def state_income_tax(situation): - state = get_state(situation).lower() - return state + "_income_tax" - - -# Return the function that computes total state exemptions -def state_exemptions(situation): - state = get_state(situation).lower() - # try to calculate state_exemption, if error, return 0 --> NEED TO ADD Feature - return state + "_exemptions" - - -def state_agi(situation): - state = get_state(situation).lower() - return state + "_agi" - - -def placeholder(): - return "placeholder" - - -# List of variables that aren't mapped in Policy Engine -placeholder_variables = ["fica", "frate", "srate", "ficar", "tfica", "exemption_phaseout", "deduction_phaseout", - "income_tax19", "exemption_surtax", "general_tax_credit", "FICA", "state_rent_expense", - "state_property_tax_credit", "state_eic", "state_total_credits", "state_bracket_rate", - "state_exemptions", "state_cdcc"] - -# list of variables that match Taxsim output variables -variables = ["get_year", "get_state", "income_tax", "state_income_tax", "fica", "frate", "srate", "ficar", "tfica", - "adjusted_gross_income", "tax_unit_taxable_unemployment_compensation", "tax_unit_taxable_social_security", - "basic_standard_deduction", "exemptions", "exemption_phaseout", "deduction_phaseout", - "taxable_income_deductions", - "taxable_income", "income_tax19", "exemption_surtax", "general_tax_credit", "ctc", "refundable_ctc", - "cdcc", - "eitc", "amt_income", "alternative_minimum_tax", "income_tax_before_refundable_credits", "FICA", - "household_net_income", - "state_rent_expense", "state_agi", "state_exemptions", "state_standard_deduction", - "state_itemized_deductions", - "state_taxable_income", "state_property_tax_credit", "state_child_care_credit", "state_eic", - "state_total_credits", - "state_bracket_rate", "self_employment_income", "net_investment_income_tax", "employee_medicare_tax", - "rrc_cares"] - -# list of dictiionaries where each Policy Engine variable is mapped to the Taxsim name. -# Booleans indicate whether the variable is a placeholder, a local variable, or a local variable that doesn't return a function (only get_year and state) -# list of variables mapped to taxsim "2" input (full variables) -full_variables = [ - {'variable': 'get_year', 'taxsim_name': 'year', 'is_placeholder': False, 'is_local': True, 'is_local_getter': True}, - {'variable': 'get_state', 'taxsim_name': 'state', 'is_placeholder': False, 'is_local': True, - 'is_local_getter': True}, - {'variable': 'income_tax', 'taxsim_name': 'fiitax', 'is_placeholder': False, 'is_local': False, - 'is_local_getter': False}, - {'variable': 'state_income_tax', 'taxsim_name': 'siitax', 'is_placeholder': False, 'is_local': True, - 'is_local_getter': False}, - {'variable': 'fica', 'taxsim_name': 'fica', 'is_placeholder': True, 'is_local': False, 'is_local_getter': False}, - {'variable': 'frate', 'taxsim_name': 'frate', 'is_placeholder': True, 'is_local': False, 'is_local_getter': False}, - {'variable': 'srate', 'taxsim_name': 'srate', 'is_placeholder': True, 'is_local': False, 'is_local_getter': False}, - {'variable': 'ficar', 'taxsim_name': 'ficar', 'is_placeholder': True, 'is_local': False, 'is_local_getter': False}, - {'variable': 'tfica', 'taxsim_name': 'tfica', 'is_placeholder': True, 'is_local': False, 'is_local_getter': False}, - {'variable': 'adjusted_gross_income', 'taxsim_name': 'v10', 'is_placeholder': False, 'is_local': False, - 'is_local_getter': False}, - {'variable': 'tax_unit_taxable_unemployment_compensation', 'taxsim_name': 'v11', 'is_placeholder': False, - 'is_local': False, 'is_local_getter': False}, - {'variable': 'tax_unit_taxable_social_security', 'taxsim_name': 'v12', 'is_placeholder': False, 'is_local': False, - 'is_local_getter': False}, - {'variable': 'basic_standard_deduction', 'taxsim_name': 'v13', 'is_placeholder': False, 'is_local': False, - 'is_local_getter': False}, - {'variable': 'exemptions', 'taxsim_name': 'v14', 'is_placeholder': False, 'is_local': False, - 'is_local_getter': False}, - {'variable': 'exemption_phaseout', 'taxsim_name': 'v15', 'is_placeholder': True, 'is_local': False, - 'is_local_getter': False}, - {'variable': 'deduction_phaseout', 'taxsim_name': 'v16', 'is_placeholder': True, 'is_local': False, - 'is_local_getter': False}, - {'variable': 'taxable_income_deductions', 'taxsim_name': 'v17', 'is_placeholder': False, 'is_local': False, - 'is_local_getter': False}, - {'variable': 'taxable_income', 'taxsim_name': 'v18', 'is_placeholder': False, 'is_local': False, - 'is_local_getter': False}, - {'variable': 'income_tax19', 'taxsim_name': 'v19', 'is_placeholder': True, 'is_local': False, - 'is_local_getter': False}, - {'variable': 'exemption_surtax', 'taxsim_name': 'v20', 'is_placeholder': True, 'is_local': False, - 'is_local_getter': False}, - {'variable': 'general_tax_credit', 'taxsim_name': 'v21', 'is_placeholder': True, 'is_local': False, - 'is_local_getter': False}, - {'variable': 'ctc', 'taxsim_name': 'v22', 'is_placeholder': False, 'is_local': False, 'is_local_getter': False}, - {'variable': 'refundable_ctc', 'taxsim_name': 'v23', 'is_placeholder': False, 'is_local': False, - 'is_local_getter': False}, - {'variable': 'cdcc', 'taxsim_name': 'v24', 'is_placeholder': False, 'is_local': False, 'is_local_getter': False}, - {'variable': 'eitc', 'taxsim_name': 'v25', 'is_placeholder': False, 'is_local': False, 'is_local_getter': False}, - {'variable': 'amt_income', 'taxsim_name': 'v26', 'is_placeholder': False, 'is_local': False, - 'is_local_getter': False}, - {'variable': 'alternative_minimum_tax', 'taxsim_name': 'v27', 'is_placeholder': False, 'is_local': False, - 'is_local_getter': False}, - {'variable': 'income_tax_before_refundable_credits', 'taxsim_name': 'v28', 'is_placeholder': False, - 'is_local': False, 'is_local_getter': False}, - {'variable': 'FICA', 'taxsim_name': 'v29', 'is_placeholder': True, 'is_local': True, 'is_local_getter': False}, - {'variable': 'household_net_income', 'taxsim_name': 'v30', 'is_placeholder': False, 'is_local': False, - 'is_local_getter': False}, - {'variable': 'state_rent_expense', 'taxsim_name': 'v31', 'is_placeholder': True, 'is_local': False, - 'is_local_getter': False}, - {'variable': 'state_agi', 'taxsim_name': 'v32', 'is_placeholder': False, 'is_local': True, - 'is_local_getter': False}, - {'variable': 'state_exemptions', 'taxsim_name': 'v33', 'is_placeholder': True, 'is_local': True, - 'is_local_getter': False}, - {'variable': 'state_standard_deduction', 'taxsim_name': 'v34', 'is_placeholder': False, 'is_local': True, - 'is_local_getter': False}, - {'variable': 'state_itemized_deductions', 'taxsim_name': 'v35', 'is_placeholder': False, 'is_local': True, - 'is_local_getter': False}, - {'variable': 'state_taxable_income', 'taxsim_name': 'v36', 'is_placeholder': False, 'is_local': True, - 'is_local_getter': False}, - {'variable': 'state_property_tax_credit', 'taxsim_name': 'v37', 'is_placeholder': True, 'is_local': False, - 'is_local_getter': False}, - {'variable': 'state_child_care_credit', 'taxsim_name': 'v38', 'is_placeholder': True, 'is_local': True, - 'is_local_getter': False}, - {'variable': 'state_eic', 'taxsim_name': 'v39', 'is_placeholder': True, 'is_local': False, - 'is_local_getter': False}, - {'variable': 'state_total_credits', 'taxsim_name': 'v40', 'is_placeholder': True, 'is_local': False, - 'is_local_getter': False}, - {'variable': 'state_bracket_rate', 'taxsim_name': 'v41', 'is_placeholder': True, 'is_local': False, - 'is_local_getter': False}, - {'variable': 'self_employment_income', 'taxsim_name': 'v42', 'is_placeholder': False, 'is_local': False, - 'is_local_getter': False}, - {'variable': 'net_investment_income_tax', 'taxsim_name': 'v43', 'is_placeholder': False, 'is_local': False, - 'is_local_getter': False}, - {'variable': 'employee_medicare_tax', 'taxsim_name': 'v44', 'is_placeholder': False, 'is_local': False, - 'is_local_getter': False}, - {'variable': 'rrc_cares', 'taxsim_name': 'v45', 'is_placeholder': False, 'is_local': False, - 'is_local_getter': False} -] - -# variables mapped to taxsim "0" input (standard) -standard_variables = [ - {'variable': 'get_year', 'taxsim_name': 'year', 'is_placeholder': False, 'is_local': True, 'is_local_getter': True}, - {'variable': 'get_state', 'taxsim_name': 'state', 'is_placeholder': False, 'is_local': True, - 'is_local_getter': True}, - {'variable': 'income_tax', 'taxsim_name': 'fiitax', 'is_placeholder': False, 'is_local': False, - 'is_local_getter': False}, - {'variable': 'state_income_tax', 'taxsim_name': 'siitax', 'is_placeholder': False, 'is_local': True, - 'is_local_getter': False}, - {'variable': 'fica', 'taxsim_name': 'fica', 'is_placeholder': True, 'is_local': False, 'is_local_getter': False}, - {'variable': 'frate', 'taxsim_name': 'frate', 'is_placeholder': True, 'is_local': False, 'is_local_getter': False}, - {'variable': 'srate', 'taxsim_name': 'srate', 'is_placeholder': True, 'is_local': False, 'is_local_getter': False}, - {'variable': 'ficar', 'taxsim_name': 'ficar', 'is_placeholder': True, 'is_local': False, 'is_local_getter': False}, - {'variable': 'tfica', 'taxsim_name': 'tfica', 'is_placeholder': True, 'is_local': False, 'is_local_getter': False}, -] - - -# Calculate the variables based on the user's information and save them to a dataframe - -# input a list of simulations, a list of households, and a variable_dict. -# variable dict will be switched to either 0, 2, 5 to correspond with taxsim inputs --> to be implemented - -# separate iteration into one single household output -def single_household(household): - row = [] - - simulation = Simulation(situation=household, ) - - variable_dict = full_variables - - for variable_info in variable_dict: - variable = variable_info['variable'] - taxsim_name = variable_info['taxsim_name'] - is_placeholder = variable_info['is_placeholder'] - is_local = variable_info['is_local'] - is_local_getter = variable_info["is_local_getter"] - - # if the variable is a placeholder, append "placeholder" - if is_placeholder: - row.append(placeholder()) - else: - # if the variable is local, return the value of the local function (returns a to policyenginge function name) - if is_local: - function = globals()[variable] - result = function(household) - # if the variable is a local getter, just return the value of the local function - if is_local_getter: - calculation = result - # otherwise, run policy engine calculation using the function name - else: - calculation = simulation.calculate(result) - # if the variable is not local, just run policy engine calculation - else: - calculation = simulation.calculate(variable) - - row.append(calculation) - - return row - - -# second function that calls the single household for each in the list -def multiple_households(list_of_households): - output = [] - - list_of_simulations = make_simulation(list_of_households) - - for simulation, household in zip(list_of_simulations, list_of_households): - row = single_household(household) - output.append(row) - - # Create DataFrame from the output with taxsim_names as columns - return output - - -def make_dataframe(input_file, variable_dict, is_multiple_households: bool): - if not is_multiple_households: - household = input_file[0] - output = [single_household(household)] - df = pd.DataFrame(output, columns=[var['taxsim_name'] for var in variable_dict], - index=pd.RangeIndex(start=1, stop=len(output) + 1, name='taxsimid')) - return df - else: - output = multiple_households(input_file) - df = pd.DataFrame(output, columns=[var['taxsim_name'] for var in variable_dict], - index=pd.RangeIndex(start=1, stop=len(output) + 1, name='taxsimid')) - return df - - -# return true if the input file contains more than one household -def is_multiple_households(list): - return len(list) > 1 - - -# run main with an input file to execute the methods in the correct order -def main(input_file): - print("running script") - - list_of_households = read_input_file(input_file) - print(list_of_households) - variable_dict = full_variables # going to use a condition to set the correct variable list depending on the input - - output = make_dataframe(list_of_households, variable_dict, is_multiple_households(list_of_households)) - - output.to_csv('output.csv', index=True) - print("script finished") - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description='Process input file and generate output.') - parser.add_argument('input_file', type=str, help='Path to the input CSV file') - args = parser.parse_args() - - main(args.input_file) diff --git a/output/policyengine_taxsim_output.csv b/output/policyengine_taxsim_output.csv deleted file mode 100644 index 9f5171d..0000000 --- a/output/policyengine_taxsim_output.csv +++ /dev/null @@ -1,3 +0,0 @@ -taxsimid,year,state,mstat,page,sage,fiitax,siitax,fica -999,2021,3,1,40,0,2775.0,1008.87,3748.5 -11,2021,3,1,40,0,2535.0,942.07,3595.5 diff --git a/output/taxsim35_output.csv b/output/taxsim35_output.csv deleted file mode 100644 index f9c17bb..0000000 --- a/output/taxsim35_output.csv +++ /dev/null @@ -1,3 +0,0 @@ -taxsimid,year,state,fiitax,siitax,fica,frate,srate,ficar,tfica,credits,v10,v11,v12,v13,v14,v15,v16,v17,v18,v19,v20, v21,v22,v23,v24,v25,v26,v27,v28,v29,v30,v31,v32,v33,v34, v35,v36,v37,v38,v39,v40,v41,staxbc,v42,v43,v44,v45 -999.,2021,3,2775.00,1008.87,7497.00,12.00,3.34,15.30,3748.50,.00,49000.00,.00,.00,12550.00,.00,.00,.00,.00,36450.00,4175.00,.00,.00,.00,.00,.00,.00,49000.00,.00,4175.00,7497.00,49000.01,.00,49000.01,.00,12550.00,1008.87,36450.01,.00,.00,.00,.00,3.34,.00,.00,.00,.00,1400.00 -11.,2021,3,2535.00,942.07,7191.00,12.00,3.34,15.30,3595.50,.00,47000.00,.00,.00,12550.00,.00,.00,.00,.00,34450.00,3935.00,.00,.00,.00,.00,.00,.00,47000.00,.00,3935.00,7191.00,47001.01,.00,47000.01,.00,12550.00,942.07,34450.01,.00,.00,.00,.00,3.34,.00,.00,.00,.00,1400.00 diff --git a/policyengine_taxsim/core/input_mapper.py b/policyengine_taxsim/core/input_mapper.py index a19e7b9..fc06b64 100644 --- a/policyengine_taxsim/core/input_mapper.py +++ b/policyengine_taxsim/core/input_mapper.py @@ -19,7 +19,7 @@ def import_single_household(taxsim_vars): year = str(int(taxsim_vars["year"])) # Ensure year is an integer string if "state" not in taxsim_vars: # If state is not provided set it to AL as default state - taxsim_vars["state"] = 1 + taxsim_vars["state"] = 2 state = get_state_code(taxsim_vars["state"]) diff --git a/saved_tests.pickle b/saved_tests.pickle deleted file mode 100644 index a85ef2d..0000000 Binary files a/saved_tests.pickle and /dev/null differ diff --git a/taxsim_emulator.py b/taxsim_emulator.py deleted file mode 100644 index 393dd69..0000000 --- a/taxsim_emulator.py +++ /dev/null @@ -1,402 +0,0 @@ -from policyengine_us import Simulation -from policyengine_us import parameters -from policyengine_us.model_api import * - -import pandas as pd - -import argparse - -from TaxsimInputReader import InputReader - - -def read_input_file(input_file): - reader = InputReader(input_file) - return reader - - -def get_situations(reader): - return reader.situations - - -def get_output_level(reader): - return reader.output_level - - -def get_variables(output_level): - if output_level == "standard": - return standard_variables - elif output_level == "full": - return full_variables - elif output_level == "text_descriptions": - raise ValueError( - "Emulator does not support the text description option yet. Please use standard or full output levels") - - -# input a list of situations and convert each situation into a simulation object -def make_simulation(list_of_households): - list_of_simulations = [] - for situation in list_of_households: - list_of_simulations.append(Simulation(situation=situation, )) - return list_of_simulations - - -def convert_to_number(arr): - # Access the element (assuming it's a single-element array) - value = arr[0] - # Round to two decimal places - rounded_value = round(value, 2) - # Format as string with two decimal places and trailing zeros - return f"{rounded_value:.2f}" - - -# Return true if the string is a date -def is_date(string): - try: - pd.to_datetime(string, format='%Y') - return True - except Exception: - return False - - -# Return true if the string can create a StateCode instance -def is_state_code(string): - try: - StateCode[string] - return True - except Exception: - return False - - -# Get the user's state -def get_state(situation): - year_and_state = list(situation["households"]["your household"]["state_name"].items()) - for item in year_and_state: - for string in item: - if is_state_code(string): - return string - - -# Get the tax filing year -def get_year(situation): - year_and_state = list(situation["households"]["your household"]["state_name"].items()) - for item in year_and_state: - for string in item: - if is_date(string): - return string - - -# Get the state SOI code for the state output column -def get_state_code(situation): - state = get_state(situation) - state_code_mapping = { - "AL": 1, "AK": 2, "AZ": 3, "AR": 4, "CA": 5, "CO": 6, "CT": 7, "DE": 8, "DC": 9, "FL": 10, - "GA": 11, "HI": 12, "ID": 13, "IL": 14, "IN": 15, "IA": 16, "KS": 17, "KY": 18, "LA": 19, - "ME": 20, "MD": 21, "MA": 22, "MI": 23, "MN": 24, "MS": 25, "MO": 26, "MT": 27, "NE": 28, - "NV": 29, "NH": 30, "NJ": 31, "NM": 32, "NY": 33, "NC": 34, "ND": 35, "OH": 36, "OK": 37, - "OR": 38, "PA": 39, "RI": 40, "SC": 41, "SD": 42, "TN": 43, "TX": 44, "UT": 45, "VT": 46, - "VA": 47, "WA": 48, "WV": 49, "WI": 50, "WY": 51 - } - return state_code_mapping.get(state) - - -# Returns the itemized_deduction function for the user's state -def state_itemized_deductions(situation): - state = get_state(situation).lower() - return state + "_itemized_deductions" - - -# Returns the standard_deduction function for the user's state -def state_standard_deduction(situation): - state = get_state(situation).lower() - return state + "_standard_deduction" - - -# Returns the function that computes Child and Dependent Care Credit for the user's state -def state_child_care_credit(situation): - state = get_state(situation).lower() - return state + "_cdcc" - - -# Returns the function that computes the user's AGI based on filing state -def state_adjusted_gross_income(situation): - state = get_state(situation).lower() - return state + "_agi" - - -# Returns the function that computes the user's state taxable income -def state_taxable_income(situation): - state = get_state(situation).lower() - return state + "_taxable_income" - - -# Returns the function that computes state income tax -def state_income_tax(situation): - state = get_state(situation).lower() - return state + "_income_tax" - - -# Return the function that computes total state exemptions -def state_exemptions(situation): - state = get_state(situation).lower() - # try to calculate state_exemption, if error, return 0 --> NEED TO ADD Feature - return state + "_exemptions" - - -def state_agi(situation): - state = get_state(situation).lower() - return state + "_agi" - - -def placeholder(situation): - return "placeholder" - - -def property_tax_credit(situation): - state = get_state(situation).lower() - return state + "_property_tax_credit" - - -def child_care_credit(situation): - state = get_state(situation).lower() - if state == 'me': - return state + '_child_care_credit' - else: - return 'placeholder' - - -def get_fica(situation): - simulation = Simulation(situation=situation, ) - employee_social_security_tax = simulation.calculate(variable_name="employee_social_security_tax") - employee_medicare_tax = simulation.calculate(variable_name="employee_medicare_tax") - additional_medicare_tax = simulation.calculate(variable_name="additional_medicare_tax") - return employee_social_security_tax + employee_medicare_tax + additional_medicare_tax - - -def get_mtr(household, mtr_type, _simulation): - simulation = Simulation(situation=household) - simulation_with_earnings_rise = Simulation(situation=household) - year = get_year(household) - - earnings = simulation_with_earnings_rise.calculate("employment_income", year) - new_earnings = earnings + 1 - simulation_with_earnings_rise.set_input("employment_income", year, new_earnings) - fed_income_tax_original = simulation.calculate("income_tax", year) - fed_income_tax_after_rise = simulation_with_earnings_rise.calculate("income_tax", year) - income_tax_rise = fed_income_tax_after_rise - fed_income_tax_original - federal_mtr = income_tax_rise / 1 - - state_tax_original = simulation.calculate("state_income_tax", year) - state_tax_after_rise = simulation_with_earnings_rise.calculate("state_income_tax", year) - state_tax_rise = state_tax_after_rise - state_tax_original - state_mtr = state_tax_rise / 1 - - if mtr_type == "federal_mtr": - return np.array(federal_mtr * 100) - elif mtr_type == "state_mtr": - return np.array(state_mtr * 100) - else: - return np.array(0) - - -# List of variables that aren't mapped in Policy Engine -placeholder_variables = ["fica", "frate", "srate", "ficar", "tfica", "exemption_phaseout", "deduction_phaseout", - "income_tax19", "exemption_surtax", "general_tax_credit", "FICA", "state_rent_expense", - "state_property_tax_credit", "state_eic", "state_total_credits", "state_bracket_rate", - "state_exemptions", "state_cdcc"] - -# list of variables that match Taxsim output variables -variables = ["get_year", "get_state", "income_tax", "state_income_tax", "fica", "frate", "srate", "ficar", "tfica", - "adjusted_gross_income", "tax_unit_taxable_unemployment_compensation", "tax_unit_taxable_social_security", - "basic_standard_deduction", "exemptions", "exemption_phaseout", "deduction_phaseout", - "taxable_income_deductions", - "taxable_income", "income_tax19", "exemption_surtax", "general_tax_credit", "ctc", "refundable_ctc", - "cdcc", - "eitc", "amt_income", "alternative_minimum_tax", "income_tax_before_refundable_credits", "FICA", - "household_net_income", - "state_rent_expense", "state_agi", "state_exemptions", "state_standard_deduction", - "state_itemized_deductions", - "state_taxable_income", "state_property_tax_credit", "state_child_care_credit", "state_eic", - "state_total_credits", - "state_bracket_rate", "self_employment_income", "net_investment_income_tax", "employee_medicare_tax", - "rrc_cares"] - -# list of dictionaries where each Policy Engine variable is mapped to the Taxsim name. -# Booleans indicate whether the variable is a placeholder, a local variable, or a local variable that doesn't return a function (only get_year and state) -# list of variables mapped to taxsim "2" input (full variables) - - -full_variables = [ - {'taxsim_name': 'year', 'calculation': 'get_year'}, - {'taxsim_name': 'state', 'calculation': 'get_state_code'}, - {'taxsim_name': 'fiitax', 'calculation': 'income_tax'}, - {'taxsim_name': 'siitax', 'calculation': lambda household: globals()['state_income_tax'](household)}, - {'taxsim_name': 'fica', 'calculation': 'get_fica'}, - {'taxsim_name': 'frate', 'calculation': 'federal_mtr'}, - {'taxsim_name': 'srate', 'calculation': 'state_mtr'}, - {'taxsim_name': 'ficar', 'calculation': 'placeholder'}, - {'taxsim_name': 'tfica', 'calculation': 'taxsim_tfica'}, - {'taxsim_name': 'v10', 'calculation': 'adjusted_gross_income'}, - {'taxsim_name': 'v11', 'calculation': 'tax_unit_taxable_unemployment_compensation'}, - {'taxsim_name': 'v12', 'calculation': 'tax_unit_taxable_social_security'}, - {'taxsim_name': 'v13', 'calculation': 'basic_standard_deduction'}, - {'taxsim_name': 'v14', 'calculation': 'exemptions'}, - {'taxsim_name': 'v15', 'calculation': 'placeholder'}, - {'taxsim_name': 'v16', 'calculation': 'placeholder'}, - {'taxsim_name': 'v17', 'calculation': 'taxable_income_deductions'}, - {'taxsim_name': 'v18', 'calculation': 'taxable_income'}, - {'taxsim_name': 'v19', 'calculation': 'income_tax'}, - {'taxsim_name': 'v20', 'calculation': 'placeholder'}, - {'taxsim_name': 'v21', 'calculation': 'placeholder'}, - {'taxsim_name': 'v22', 'calculation': 'ctc'}, - {'taxsim_name': 'v23', 'calculation': 'refundable_ctc'}, - {'taxsim_name': 'v24', 'calculation': 'cdcc'}, - {'taxsim_name': 'v25', 'calculation': 'eitc'}, - {'taxsim_name': 'v26', 'calculation': 'amt_income'}, - {'taxsim_name': 'v27', 'calculation': 'alternative_minimum_tax'}, - {'taxsim_name': 'v28', 'calculation': 'income_tax_before_refundable_credits'}, - {'taxsim_name': 'v29', 'calculation': 'placeholder'}, - {'taxsim_name': 'v30', 'calculation': 'household_net_income'}, - {'taxsim_name': 'v31', 'calculation': 'placeholder'}, - {'taxsim_name': 'v32', 'calculation': lambda household: globals()['state_agi'](household)},#this may not work for some of the states - {'taxsim_name': 'v33', 'calculation': 'placeholder'}, - {'taxsim_name': 'v34', 'calculation': lambda household: globals()['state_standard_deduction'](household)}, - {'taxsim_name': 'v35', 'calculation': lambda household: globals()['state_itemized_deductions'](household)}, - {'taxsim_name': 'v36', 'calculation': lambda household: globals()['state_taxable_income'](household)}, - {'taxsim_name': 'v37', 'calculation': lambda household: globals()['property_tax_credit'](household)}, - {'taxsim_name': 'v38', 'calculation': 'child_care_credit'}, - {'taxsim_name': 'v39', 'calculation': 'placeholder'}, - {'taxsim_name': 'v40', 'calculation': 'placeholder'}, - {'taxsim_name': 'v41', 'calculation': 'placeholder'}, - {'taxsim_name': 'v42', 'calculation': 'self_employment_income'}, - {'taxsim_name': 'v43', 'calculation': 'net_investment_income_tax'}, - {'taxsim_name': 'v44', 'calculation': 'employee_medicare_tax'}, - {'taxsim_name': 'v45', 'calculation': 'rrc_cares'} -] - -# variables mapped to taxsim "0" input (standard) -standard_variables = [ - {'taxsim_name': 'year', 'calculation': 'get_year'}, - {'taxsim_name': 'state', 'calculation': 'get_state'}, - {'taxsim_name': 'fiitax', 'calculation': 'income_tax'}, - {'taxsim_name': 'siitax', 'calculation': lambda household: globals()['state_income_tax'](household)}, - {'taxsim_name': 'fica', 'calculation': 'placeholder'}, - {'taxsim_name': 'frate', 'calculation': 'placeholder'}, - {'taxsim_name': 'srate', 'calculation': 'placeholder'}, - {'taxsim_name': 'ficar', 'calculation': 'placeholder'}, - {'taxsim_name': 'tfica', 'calculation': 'placeholder'} -] - - -# Calculate the variables based on the user's information and save them to a dataframe -# input a list of simulations, a list of households, and a variable_dict. -# variable dict will be switched to either 0, 2, 5 to correspond with taxsim inputs --> to be implemented - - -# separate iteration into one single household output -def single_household(household, variable_dict): - row = [] - - simulation = Simulation(situation=household, ) - year = get_year(situation=household) - simulation.calculate('adjusted_gross_income', period=year) - - for variable_info in variable_dict: - calculation = variable_info['calculation'] - # check that the string isn't a local function. If it is, assign the result of the local function to result - if calculation in ['get_year', 'get_state_code', 'placeholder']: - function = globals()[calculation] - result = function(household) - # if calculation field is a string, it is a policy engine function, so use the simulation to calculate - elif isinstance(calculation, str): - if calculation == 'adjusted_gross_income': - # simulation.trace = True - year = get_year(situation=household) - result = simulation.calculate(variable_name=calculation, period=year) - result = convert_to_number(result) - # print(simulation.tracer.print_computation_log()) - elif calculation == 'get_fica': - result = get_fica(household) - result = convert_to_number(result) - elif calculation == 'federal_mtr': - result = get_mtr(household=household, mtr_type=calculation, _simulation=simulation) - result = convert_to_number(result) - elif calculation == 'state_mtr': - result = get_mtr(household=household, mtr_type=calculation, _simulation=simulation) - result = convert_to_number(result) - elif calculation == 'child_care_credit': - result = child_care_credit(household) - if result != 'placeholder': - result = simulation.calculate(calculation,period=year) - result = convert_to_number(result) - else: - result = simulation.calculate(calculation,period=year) - result = convert_to_number(result) - # if calculation is not a string, it is a local function that returns the name of a policy engine function - # take the result of calculation, input it to the simulation.calculate, and assign the result to result - else: - func = calculation(household) - result = simulation.calculate(func,period=year) - result = convert_to_number(result) - - row.append(result) - - return row - - -# second function that calls the single household for each in the list -def multiple_households(list_of_households, variable_dict): - output = [] - - list_of_simulations = make_simulation(list_of_households) - - for simulation, household in zip(list_of_simulations, list_of_households): - row = single_household(household, variable_dict) - output.append(row) - - # Create DataFrame from the output with taxsim_names as columns - return output - - -def make_dataframe(input_file, variable_dict, is_multiple_households: bool): - if not is_multiple_households: - household = input_file[0] - output = [single_household(household, variable_dict)] - df = pd.DataFrame(output, columns=[var['taxsim_name'] for var in variable_dict], - index=pd.RangeIndex(start=1, stop=len(output) + 1, name='taxsimid')) - return df - else: - output = multiple_households(input_file, variable_dict) - df = pd.DataFrame(output, columns=[var['taxsim_name'] for var in variable_dict], - index=pd.RangeIndex(start=1, stop=len(output) + 1, name='taxsimid')) - return df - - -# return true if the input file contains more than one household -def is_multiple_households(list): - return len(list) > 1 - - -# run main with an input file to execute the methods in the correct order -def main(input_file): - print("running script") - - reader = read_input_file(input_file) - list_of_households = get_situations(reader) - output_level = get_output_level(reader) - variable_dict = get_variables(output_level) - - print("Chosen Output Level: " + output_level) - - output = make_dataframe(list_of_households, variable_dict, is_multiple_households(list_of_households)) - - output.to_csv('output.csv', index=True) - - print("script finished") - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description='Process input file and generate output.') - parser.add_argument('input_file', type=str, help='Path to the input CSV file') - args = parser.parse_args() - - main(args.input_file)