Skip to content

Commit

Permalink
Merge in recent changes on master branch
Browse files Browse the repository at this point in the history
  • Loading branch information
martinholmer committed Sep 25, 2024
2 parents 15d1de8 + c698a38 commit e897b15
Show file tree
Hide file tree
Showing 10 changed files with 70 additions and 140 deletions.
2 changes: 0 additions & 2 deletions MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,3 @@ include taxcalc/policy_current_law.json
include taxcalc/puf_weights.csv.gz
include taxcalc/puf_ratios.csv
include taxcalc/records_variables.json
include taxcalc/tmd_weights.csv.gz
include taxcalc/tmd_growfactors.csv
6 changes: 3 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ help:
@echo "clean : remove .pyc files and local taxcalc package"
@echo "package : build and install local package"
@echo "pytest-cps : generate report for and cleanup after"
@echo " pytest -m 'not requires_pufcsv and not requires_tmdcsv and not pre_release'"
@echo " pytest -m 'not requires_pufcsv and not pre_release'"
@echo "pytest : generate report for and cleanup after"
@echo " pytest -m 'not pre_release'"
@echo "pytest-all : generate report for and cleanup after"
Expand Down Expand Up @@ -51,7 +51,7 @@ endef
.PHONY=pytest-cps
pytest-cps:
@$(pytest-setup)
@cd taxcalc ; pytest -n4 --disable-warnings --durations=0 --durations-min=2 -m "not requires_pufcsv and not requires_tmdcsv and not pre_release"
@cd taxcalc ; pytest -n4 --disable-warnings --durations=0 --durations-min=2 -m "not requires_pufcsv and not pre_release"
@$(pytest-cleanup)

.PHONY=pytest
Expand Down Expand Up @@ -103,7 +103,7 @@ define coverage-cleanup
rm -f .coverage htmlcov/*
endef

COVMARK = "not requires_pufcsv and not requires_tmdcsv and not pre_release"
COVMARK = "not requires_pufcsv and not pre_release"

OS := $(shell uname -s)

Expand Down
13 changes: 8 additions & 5 deletions docs/usage/data.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,13 +61,16 @@ file.

The [tax-microdata
repository](https://github.com/PSLmodels/tax-microdata-benchmarking)
produces an input variables file (`tmd.csv`) and a
`tmd_weights.csv.gz` file that is included in the Tax-Calculator
produces an input variables file (`tmd.csv`), a national weights file
(`tmd_weights.csv.gz`), and a variable growth factors file
(`tmd_growfactors.csv`) that can be used with the Tax-Calculator
package beginning with the 3.6.0 release. The `tmd.csv` file is
available only to Tax-Calculator users who have purchased their own
version of the 2015 IRS-SOI PUF. For those users, the
`Records.tmd_constructor()` method creates a `Records` class object
containing the `tmd` variables and weights.
version of the 2015 IRS-SOI PUF. For those users, those three files
are avaiable from the tax-microdata repository. These three tmd files
can be used with the Tax-Calculator Python API (using the
`Records.tmd_constructor()` static method) or with the Tax-Calculator
CLI tool, `tc`.

## Using other data with Tax-Calculator

Expand Down
1 change: 0 additions & 1 deletion pytest.ini
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ testpaths =
taxcalc
markers =
requires_pufcsv
requires_tmdcsv
pre_release
compatible_data
local
Expand Down
3 changes: 0 additions & 3 deletions taxcalc.egg-info/SOURCES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,6 @@ taxcalc/puf_weights.csv.gz
taxcalc/records.py
taxcalc/records_variables.json
taxcalc/taxcalcio.py
taxcalc/tmd_growfactors.csv
taxcalc/tmd_weights.csv.gz
taxcalc/utils.py
taxcalc/utilsprvt.py
taxcalc.egg-info/PKG-INFO
Expand Down Expand Up @@ -214,7 +212,6 @@ taxcalc/tests/test_records.py
taxcalc/tests/test_reforms.py
taxcalc/tests/test_responses.py
taxcalc/tests/test_taxcalcio.py
taxcalc/tests/test_tmdcsv.py
taxcalc/tests/test_utils.py
taxcalc/validation/CSV_INPUT_VARS.md
taxcalc/validation/CSV_OUTPUT_VARS.md
Expand Down
35 changes: 20 additions & 15 deletions taxcalc/records.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
# pylint --disable=locally-disabled records.py

import os
from pathlib import Path
import numpy as np
import pandas as pd
from taxcalc.data import Data
Expand Down Expand Up @@ -116,9 +117,6 @@ class instance: Records
PUF_RATIOS_FILENAME = 'puf_ratios.csv'
CPS_WEIGHTS_FILENAME = 'cps_weights.csv.gz'
CPS_RATIOS_FILENAME = None
TMD_WEIGHTS_FILENAME = 'tmd_weights.csv.gz'
TMD_GROWFACTORS_FILENAME = 'tmd_growfactors.csv'
TMD_RATIOS_FILENAME = None
CODE_PATH = os.path.abspath(os.path.dirname(__file__))
VARINFO_FILE_NAME = 'records_variables.json'
VARINFO_FILE_PATH = CODE_PATH
Expand Down Expand Up @@ -226,9 +224,12 @@ def cps_constructor(data=None,
exact_calculations=exact_calculations)

@staticmethod
def tmd_constructor(data, # path to tmd.csv file or dataframe
gfactors=GrowFactors(TMD_GROWFACTORS_FILENAME),
exact_calculations=False): # pragma: no cover
def tmd_constructor(
data_path: Path,
weights_path: Path,
growfactors_path: Path,
exact_calculations=False
): # pragma: no cover
"""
Static method returns a Records object instantiated with TMD
input data. This works in a analogous way to Records(), which
Expand All @@ -239,14 +240,18 @@ def tmd_constructor(data, # path to tmd.csv file or dataframe
eliminate the need to specify all the details of the PUF input
data.
"""
weights = os.path.join(Records.CODE_PATH, Records.TMD_WEIGHTS_FILENAME)
return Records(data=data,
start_year=Records.TMDCSV_YEAR,
gfactors=gfactors,
weights=weights,
adjust_ratios=Records.TMD_RATIOS_FILENAME,
exact_calculations=exact_calculations)

assert isinstance(data_path, Path)
assert isinstance(weights_path, Path)
assert isinstance(growfactors_path, Path)
return Records(
data=pd.read_csv(data_path),
start_year=Records.TMDCSV_YEAR,
weights=str(weights_path),
gfactors=GrowFactors(growfactors_filename=str(growfactors_path)),
adjust_ratios=None,
exact_calculations=exact_calculations,
)

def increment_year(self):
"""
Add one to current year, and also does
Expand Down Expand Up @@ -277,7 +282,7 @@ def _extrapolate(self, year):
"""
# pylint: disable=too-many-statements,no-member
# put values in local dictionary
gfv = dict()
gfv = {}
for name in GrowFactors.VALID_NAMES:
gfv[name] = self.gfactors.factor_value(name, year)
# apply values to Records variables
Expand Down
57 changes: 39 additions & 18 deletions taxcalc/taxcalcio.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@ def __init__(self, input_data, tax_year, baseline, reform, assump,
self.puf_input_data = False
self.cps_input_data = False
self.tmd_input_data = False
self.tmd_weights = None
self.tmd_gfactor = None
if isinstance(input_data, str):
# remove any leading directory path from INPUT filename
fname = os.path.basename(input_data)
Expand All @@ -90,6 +92,23 @@ def __init__(self, input_data, tax_year, baseline, reform, assump,
if not self.cps_input_data and not os.path.isfile(input_data):
msg = 'INPUT file could not be found'
self.errmsg += 'ERROR: {}\n'.format(msg)
# if tmd_input_data is True, construct weights and gfactor paths
if self.tmd_input_data: # pragma: no cover
tmd_dir = os.path.dirname(input_data)
if 'TMD_AREA' in os.environ:
area = os.environ['TMD_AREA']
wfile = f'{area}_tmd_weights.csv.gz'
inp = f'{fname[:-4]}_{area}-{str(tax_year)[2:]}'
else: # using national weights
wfile = 'tmd_weights.csv.gz'
self.tmd_weights = os.path.join(tmd_dir, wfile)
self.tmd_gfactor = os.path.join(tmd_dir, 'tmd_growfactors.csv')
if not os.path.isfile(self.tmd_weights):
msg = f'weights file {self.tmd_weights} could not be found'
self.errmsg += 'ERROR: {}\n'.format(msg)
if not os.path.isfile(self.tmd_gfactor):
msg = f'gfactor file {self.tmd_gfactor} could not be found'
self.errmsg += 'ERROR: {}\n'.format(msg)
elif isinstance(input_data, pd.DataFrame):
inp = 'df-{}'.format(str(tax_year)[2:])
else:
Expand Down Expand Up @@ -123,7 +142,7 @@ def __init__(self, input_data, tax_year, baseline, reform, assump,
elif isinstance(reform, str):
self.specified_reform = True
# split any compound reform into list of simple reforms
refnames = list()
refnames = []
reforms = reform.split('+')
for rfm in reforms:
# remove any leading directory path from rfm filename
Expand Down Expand Up @@ -206,7 +225,7 @@ def __init__(self, input_data, tax_year, baseline, reform, assump,
self.calc = None
self.calc_base = None
self.param_dict = None
self.policy_dicts = list()
self.policy_dicts = []

def init(self, input_data, tax_year, baseline, reform, assump,
aging_input_data, exact_calculations):
Expand Down Expand Up @@ -234,7 +253,7 @@ def init(self, input_data, tax_year, baseline, reform, assump,
# get assumption sub-dictionaries
paramdict = Calculator.read_json_param_objects(None, assump)
# get policy parameter dictionaries from --reform file(s)
policydicts = list()
policydicts = []
if self.specified_reform:
reforms = reform.split('+')
for ref in reforms:
Expand All @@ -252,9 +271,7 @@ def init(self, input_data, tax_year, baseline, reform, assump,
self.errmsg += valerr_msg.__str__()
# create GrowFactors base object that incorporates gdiff_baseline
if self.tmd_input_data:
gfactors_base = GrowFactors( # pragma: no cover
Records.TMD_GROWFACTORS_FILENAME
)
gfactors_base = GrowFactors(self.tmd_gfactor) # pragma: no cover
else:
gfactors_base = GrowFactors()
gdiff_baseline.apply_to(gfactors_base)
Expand All @@ -266,9 +283,7 @@ def init(self, input_data, tax_year, baseline, reform, assump,
self.errmsg += valerr_msg.__str__()
# create GrowFactors ref object that has all gdiff objects applied
if self.tmd_input_data:
gfactors_ref = GrowFactors( # pragma: no cover
Records.TMD_GROWFACTORS_FILENAME
)
gfactors_ref = GrowFactors(self.tmd_gfactor) # pragma: no cover
else:
gfactors_ref = GrowFactors()
gdiff_baseline.apply_to(gfactors_ref)
Expand Down Expand Up @@ -333,14 +348,20 @@ def init(self, input_data, tax_year, baseline, reform, assump,
exact_calculations=exact_calculations
)
elif self.tmd_input_data:
recs = Records.tmd_constructor(
data=input_data,
recs = Records(
data=pd.read_csv(input_data),
start_year=Records.TMDCSV_YEAR,
weights=self.tmd_weights,
gfactors=gfactors_ref,
adjust_ratios=None,
exact_calculations=exact_calculations
) # pragma: no cover
recs_base = Records.tmd_constructor(
data=input_data,
recs_base = Records(
data=pd.read_csv(input_data),
start_year=Records.TMDCSV_YEAR,
weights=self.tmd_weights,
gfactors=gfactors_base,
adjust_ratios=None,
exact_calculations=exact_calculations
) # pragma: no cover
else: # if not {cps|tmd}_input_data but aging_input_data
Expand Down Expand Up @@ -541,7 +562,7 @@ def write_doc_file(self):
doc = Calculator.reform_documentation(self.param_dict,
self.policy_dicts[1:])
doc_fname = self._output_filename.replace('.csv', '-doc.text')
with open(doc_fname, 'w') as dfile:
with open(doc_fname, 'w', encoding='utf-8') as dfile:
dfile.write(doc)

def write_sqldb_file(self, dump_varset, mtr_paytax, mtr_inctax,
Expand Down Expand Up @@ -575,7 +596,7 @@ def write_tables_file(self):
tab_fname = self._output_filename.replace('.csv', '-tab.text')
# skip tables if there are not some positive weights
if self.calc_base.total_weight() <= 0.:
with open(tab_fname, 'w') as tfile:
with open(tab_fname, 'w', encoding='utf-8') as tfile:
msg = 'No tables because sum of weights is not positive\n'
tfile.write(msg)
return
Expand All @@ -597,7 +618,7 @@ def write_tables_file(self):
diff = nontax + change # using expanded_income under baseline policy
diffdf = pd.DataFrame(data=np.column_stack(diff), columns=all_vars)
# write each kind of distributional table
with open(tab_fname, 'w') as tfile:
with open(tab_fname, 'w', encoding='utf-8') as tfile:
TaxCalcIO.write_decile_table(distdf, tfile, tkind='Reform Totals')
tfile.write('\n')
TaxCalcIO.write_decile_table(diffdf, tfile, tkind='Differences')
Expand Down Expand Up @@ -730,15 +751,15 @@ def write_empty_graph_file(fname, title, reason):
'<head><title>{}</title></head>\n'
'<body><center<h1>{}</h1></center></body>\n'
'</html>\n').format(title, reason)
with open(fname, 'w') as gfile:
with open(fname, 'w', encoding='utf-8') as gfile:
gfile.write(txt)

def minimal_output(self):
"""
Extract minimal output and return it as Pandas DataFrame.
"""
varlist = ['RECID', 'YEAR', 'WEIGHT', 'INCTAX', 'LSTAX', 'PAYTAX']
odict = dict()
odict = {}
scalc = self.calc
odict['RECID'] = scalc.array('RECID') # id for tax filing unit
odict['YEAR'] = self.tax_year() # tax calculation year
Expand Down
38 changes: 0 additions & 38 deletions taxcalc/tests/test_tmdcsv.py

This file was deleted.

Loading

0 comments on commit e897b15

Please sign in to comment.