diff --git a/CHANGELOG.md b/CHANGELOG.md index 8fa3c413..bfd72c1a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ # Changelog +### 3.0.0 [#241](https://github.com/openfisca/openfisca-france-data/pull/241) +- Breaking changes + +Adapte le dépôt au passage à openfisca-survey-manager 2.0.0 qui constitue une refactorisation de l'objet survey-scenario et des simulations qui sont dedans. Cela concerne donc les parties de ce dépôts qui héritent d'objets d'openfisca-survey-manager : +- `openfisca_france_data/aggregates.py` +- `openfisca_france_data/surveys.py` +Les autres modifications sont des adaptions syntaxique mineurs du fait de cette adaptation + ### 2.0.7 [#239](https://github.com/openfisca/openfisca-france-data/pull/239/files) * New features - Ajoute des nouveaux agrégats pour FranceAggregates diff --git a/openfisca_france_data/__init__.py b/openfisca_france_data/__init__.py index 81ef2499..43389933 100644 --- a/openfisca_france_data/__init__.py +++ b/openfisca_france_data/__init__.py @@ -1,10 +1,12 @@ import inspect +from importlib import metadata import logging import os -import pkg_resources import pandas +from pathlib import Path from openfisca_core import reforms # type: ignore +from openfisca_core.errors import VariableNameConflictError import openfisca_france # type: ignore @@ -13,6 +15,9 @@ from openfisca_france_data.model.base import * # noqa analysis:ignore +openfisca_france_data_location = Path(__file__).parent.parent + + log = logging.getLogger(__name__) @@ -141,7 +146,8 @@ def apply(self): continue try: self.add_variable(variable) - except AttributeError: + except VariableNameConflictError: + # log.debug(f"{variable.__name__} has been updated in openfisca-france-data") self.update_variable(variable) @@ -206,7 +212,7 @@ def apply(self): COUNTRY_DIR = os.path.dirname(os.path.abspath(__file__)) DATA_DIR = os.path.join( - pkg_resources.get_distribution('openfisca-france-data').location, + openfisca_france_data_location, 'openfisca_france_data', 'plugins', 'aggregates', diff --git a/openfisca_france_data/aggregates.py b/openfisca_france_data/aggregates.py index ae47df5d..f46a5ebc 100644 --- a/openfisca_france_data/aggregates.py +++ b/openfisca_france_data/aggregates.py @@ -3,14 +3,12 @@ import json from pathlib import Path -import numpy as np -import pandas as pd -import pkg_resources import os from datetime import datetime +import pandas as pd from openfisca_survey_manager.aggregates import AbstractAggregates -from openfisca_france_data import AGGREGATES_DEFAULT_VARS # type: ignore +from openfisca_france_data import openfisca_france_data_location, AGGREGATES_DEFAULT_VARS # type: ignore log = logging.getLogger(__name__) @@ -38,14 +36,14 @@ def __init__(self, survey_scenario = None, target_source = None): super().__init__(survey_scenario = survey_scenario) self.target_source = target_source - def load_actual_data(self, year = None): + def load_actual_data(self, period = None): target_source = self.target_source assert target_source in ["ines", "taxipp", "france_entiere"], "les options possible pour source_cible sont ines, taxipp ou france_entiere" - assert year is not None + assert period is not None if target_source == "taxipp": taxipp_aggregates_file = Path( - pkg_resources.get_distribution("openfisca-france_data").location, + openfisca_france_data_location, "openfisca_france_data", "assets", "aggregats", @@ -62,15 +60,15 @@ def load_actual_data(self, year = None): .rename(columns = {"unnamed: 0": "description"}) .dropna(subset = ["annee 2019", "annee 2018", "annee 2017", "annee 2016"], how = "all") ) - if f"annee {year}" not in df: + if f"annee {period}" not in df: return df = ( - df[["variable_openfisca", f"annee {year}"]] + df[["variable_openfisca", f"annee {period}"]] .dropna() .rename(columns = { "variable_openfisca": "variable", - f"annee {year}": year, + f"annee {period}": period, }) ) @@ -78,25 +76,25 @@ def load_actual_data(self, year = None): df.loc[df.variable.str.startswith("nombre")] .set_index("variable") .rename(index = lambda x : x.replace("nombre_", "")) - .rename(columns = {year: "actual_beneficiaries"}) + .rename(columns = {period: "actual_beneficiaries"}) ) / self.beneficiaries_unit amounts = ( df.loc[~df.variable.str.startswith("nombre")] .set_index("variable") - .rename(columns = {year: "actual_amount"}) + .rename(columns = {period: "actual_amount"}) ) / self.amount_unit result = amounts.merge(beneficiaries, on = "variable", how = "outer").drop("PAS SIMULE") elif target_source == "ines": ines_aggregates_file = Path( - pkg_resources.get_distribution("openfisca-france_data").location, + openfisca_france_data_location, "openfisca_france_data", "assets", "aggregats", "ines", - f"ines_{year}.json" + f"ines_{period}.json" ) with open(ines_aggregates_file, 'r') as f: @@ -110,12 +108,12 @@ def load_actual_data(self, year = None): elif target_source == "france_entiere": ines_aggregates_file = Path( - pkg_resources.get_distribution("openfisca-france_data").location, + openfisca_france_data_location, "openfisca_france_data", "assets", "aggregats", "france_entiere", - f"france_entiere_{year}.json" + f"france_entiere_{period}.json" ) with open(ines_aggregates_file, 'r') as f: @@ -123,12 +121,16 @@ def load_actual_data(self, year = None): result = pd.DataFrame(data['data']).drop(['source'], axis = 1) result['actual_beneficiaries'] = result. actual_beneficiaries / self.beneficiaries_unit - result['actual_amount'] = result. actual_amount / self.amount_unit + result['actual_amount'] = result.actual_amount / self.amount_unit - result = result[["variable","actual_amount","actual_beneficiaries"]].set_index("variable") + result = result[[ + "variable", + "actual_amount", + "actual_beneficiaries", + ]].set_index("variable") return result - + def to_csv(self, path = None, absolute = True, amount = True, beneficiaries = True, default = 'actual', relative = True, target = "reform"): """Saves the table to csv.""" @@ -136,7 +138,7 @@ def to_csv(self, path = None, absolute = True, amount = True, beneficiaries = Tr if os.path.isdir(path): now = datetime.now() - file_path = os.path.join(path, 'Aggregates_%s_%s_%s.%s' % (self.target_source,self.year,now.strftime('%d-%m-%Y'), "csv")) + file_path = os.path.join(path, 'Aggregates_%s_%s_%s.%s' % (self.target_source, self.period, now.strftime('%d-%m-%Y'), "csv")) else: file_path = path diff --git a/openfisca_france_data/erfs/input_data_builder/step_08_final.py b/openfisca_france_data/erfs/input_data_builder/step_08_final.py index 3ff828a7..4f7c160c 100644 --- a/openfisca_france_data/erfs/input_data_builder/step_08_final.py +++ b/openfisca_france_data/erfs/input_data_builder/step_08_final.py @@ -10,6 +10,7 @@ from openfisca_survey_manager.temporary import temporary_store_decorator +from openfisca_france_data import openfisca_france_data_location from openfisca_france_data.utils import ( check_structure, control, @@ -20,6 +21,7 @@ set_variables_default_value, ) + log = logging.getLogger(__name__) @@ -202,8 +204,6 @@ def final(temporary_store = None, year = None, check = True): print_id(final2) # # TODO: merging with patrimoine log.info(' traitement des zones apl') - import pkg_resources - openfisca_france_data_location = pkg_resources.get_distribution('openfisca-france-data').location zone_apl_imputation_data_file_path = os.path.join( openfisca_france_data_location, 'openfisca_france_data', diff --git a/openfisca_france_data/erfs/old/datatable.py b/openfisca_france_data/erfs/old/datatable.py index b2eac78b..3de1bee4 100644 --- a/openfisca_france_data/erfs/old/datatable.py +++ b/openfisca_france_data/erfs/old/datatable.py @@ -1,5 +1,4 @@ import os -import pkg_resources import sys import gc @@ -18,8 +17,9 @@ #from openfisca_france.data.sources.config import DATA_DIR -openfisca_france_location = pkg_resources.get_distribution('openfisca-france-data').location -CONFIG_DIR = os.path.join(openfisca_france_location) +from openfisca_france_data import openfisca_france_data_location + +CONFIG_DIR = os.path.join(openfisca_france_data_location) #ERF_HDF5_DATA_DIR = os.path.join(SRC_PATH,'countries','france','data', 'erf') diff --git a/openfisca_france_data/erfs/scenario.py b/openfisca_france_data/erfs/scenario.py index 46ad488b..d3d6148a 100644 --- a/openfisca_france_data/erfs/scenario.py +++ b/openfisca_france_data/erfs/scenario.py @@ -41,5 +41,5 @@ class ErfsSurveyScenario(AbstractErfsSurveyScenario): 'zone_apl', ] - def __init__(self, year: int) -> None: - self.year = year + def __init__(self, period: int) -> None: + self.period = period diff --git a/openfisca_france_data/erfs_fpr/get_survey_scenario.py b/openfisca_france_data/erfs_fpr/get_survey_scenario.py index 448467fc..ed626143 100644 --- a/openfisca_france_data/erfs_fpr/get_survey_scenario.py +++ b/openfisca_france_data/erfs_fpr/get_survey_scenario.py @@ -10,6 +10,7 @@ from openfisca_france_data.erfs_fpr.scenario import ErfsFprSurveyScenario from openfisca_france_data import france_data_tax_benefit_system +from openfisca_survey_manager import default_config_files_directory from openfisca_france_data.model.id_variables import ( idmen_original, @@ -102,6 +103,7 @@ def get_survey_scenario( variation_factor: float = 0.03, varying_variable: str = None, survey_name: str = "input", + config_files_directory : str = default_config_files_directory, ) -> ErfsFprSurveyScenario: """Helper pour créer un `ErfsFprSurveyScenario`. @@ -126,14 +128,14 @@ def get_survey_scenario( survey_scenario = ErfsFprSurveyScenario.create( tax_benefit_system = tax_benefit_system, baseline_tax_benefit_system = baseline_tax_benefit_system, - year = year, + period = year, ) else: assert varying_variable is not None, "You need to specify the varying variable." survey_scenario = ErfsFprSurveyScenario.create( tax_benefit_system = tax_benefit_system, baseline_tax_benefit_system = baseline_tax_benefit_system, - year = year, + period = year, ) # taux marginaux !! survey_scenario.variation_factor = variation_factor @@ -153,6 +155,8 @@ def get_survey_scenario( input_data_table_by_entity_by_period = input_data_table_by_entity_by_period, survey = survey_name ) + data["config_files_directory"] = config_files_directory + # Les données peuvent venir en différents formats : # diff --git a/openfisca_france_data/erfs_fpr/scenario.py b/openfisca_france_data/erfs_fpr/scenario.py index 7e9711d3..eef10cd5 100644 --- a/openfisca_france_data/erfs_fpr/scenario.py +++ b/openfisca_france_data/erfs_fpr/scenario.py @@ -63,8 +63,9 @@ class ErfsFprSurveyScenario(AbstractErfsSurveyScenario): "wprm_init", ] - def __init__(self, year: int) -> None: - self.year = year + def __init__(self, period: int) -> None: + # self.year = period + self.period = period @classmethod def build_input_data(cls, year: int) -> None: diff --git a/openfisca_france_data/model/common.py b/openfisca_france_data/model/common.py index 13049ba8..c798e5f8 100644 --- a/openfisca_france_data/model/common.py +++ b/openfisca_france_data/model/common.py @@ -190,10 +190,10 @@ def formula(foyer_fiscal, period): weight_foyers = foyer_fiscal('weight_foyers', period) menage_ordinaire_foyers_fiscaux = foyer_fiscal('menage_ordinaire_foyers_fiscaux', period) labels = arange(1, 11) + method = 2 + decile, values = mark_weighted_percentiles(rfr, labels, weight_foyers * menage_ordinaire_foyers_fiscaux, method, return_quantiles = True) # Alternative method - # method = 2 - # decile, values = mark_weighted_percentiles(niveau_de_vie, labels, pondmen, method, return_quantiles = True) - decile, values = weighted_quantiles(rfr, labels, weight_foyers * menage_ordinaire_foyers_fiscaux, return_quantiles = True) + # decile, values = weighted_quantiles(rfr, labels, weight_foyers * menage_ordinaire_foyers_fiscaux, return_quantiles = True) return decile @@ -228,11 +228,9 @@ def formula(foyer_fiscal, period): weight_foyers = foyer_fiscal('weight_foyers', period) menage_ordinaire_foyers_fiscaux = foyer_fiscal('menage_ordinaire_foyers_fiscaux', period) labels = arange(1, 11) - # Alternative method - # method = 2 - # decile, values = mark_weighted_percentiles(niveau_de_vie, labels, pondmen, method, return_quantiles = True) - decile, values = weighted_quantiles( - rfr / nbptr, labels, weight_foyers * menage_ordinaire_foyers_fiscaux, return_quantiles = True) + method = 2 + decile, values = mark_weighted_percentiles( + rfr / nbptr, labels, weight_foyers * menage_ordinaire_foyers_fiscaux, method, return_quantiles = True) return decile diff --git a/openfisca_france_data/model/survey_variables.py b/openfisca_france_data/model/survey_variables.py index e9cde28c..16ef928b 100644 --- a/openfisca_france_data/model/survey_variables.py +++ b/openfisca_france_data/model/survey_variables.py @@ -4,7 +4,7 @@ class menage_ordinaire(Variable): value_type = int is_period_size_independent = True - default_value = True + default_value = 1 entity = Menage definition_period = YEAR diff --git a/openfisca_france_data/surveys.py b/openfisca_france_data/surveys.py index a595570d..32e9b156 100644 --- a/openfisca_france_data/surveys.py +++ b/openfisca_france_data/surveys.py @@ -7,12 +7,13 @@ from openfisca_core.model_api import Enum # type: ignore from openfisca_core.taxbenefitsystems import TaxBenefitSystem # type: ignore from openfisca_france_data import base_survey as base # type: ignore -from openfisca_survey_manager.scenarios import AbstractSurveyScenario # type: ignore +from openfisca_survey_manager.scenarios.reform_scenario import ReformScenario # type: ignore + log = logging.getLogger(__name__) -class AbstractErfsSurveyScenario(AbstractSurveyScenario): +class AbstractErfsSurveyScenario(ReformScenario): """ Parties communes entre ERFS et ERFS PFR @@ -61,7 +62,7 @@ def build_input_data_from_test_case(self, test_case_scenario): ) array_by_variable = dict() - period = periods.period(str(self.year)) + period = periods.period(str(self.period)) for variable in self.used_as_input_variables: array_by_variable[variable] = simulation.calculate_add( @@ -87,10 +88,10 @@ def create( input_data_type = None, reform = None, reform_key = None, - year: int = None, + period: int = None, ): - assert year is not None + assert period is not None assert not ((reform is not None) and (reform_key is not None)) reform_is_provided = (reform is not None) or (reform_key is not None) @@ -111,18 +112,22 @@ def create( tax_benefit_system = reform if input_data_type is not None: - survey_scenario = cls(input_data_type = input_data_type, year = year) + survey_scenario = cls(input_data_type = input_data_type, period = period) else: - survey_scenario = cls(year = year) + survey_scenario = cls(period = period) - survey_scenario.set_tax_benefit_systems( - tax_benefit_system = tax_benefit_system, - baseline_tax_benefit_system = baseline_tax_benefit_system, - ) + if baseline_tax_benefit_system: + survey_scenario.set_tax_benefit_systems(dict( + reform = tax_benefit_system, + baseline = baseline_tax_benefit_system, + )) + else: + survey_scenario.set_tax_benefit_systems(dict( + baseline = tax_benefit_system, + )) - survey_scenario.year = year - survey_scenario.period = year + survey_scenario.period = period return survey_scenario @@ -155,22 +160,22 @@ def custom_initialize(self, simulation): # 'chomage_brut', ] - three_year_span_variables = input_variables + computed_variables_used_as_input + three_period_span_variables = input_variables + computed_variables_used_as_input - simulation_period = periods.period(self.year) - for variable in three_year_span_variables: + simulation_period = periods.period(self.period) + for variable in three_period_span_variables: assert variable in self.used_as_input_variables, \ f"{variable} is not a in the input_varaibles to be used {self.used_as_input_variables}" # noqa: E501 - if self.tax_benefit_system.variables[variable].value_type == Enum: + if simulation.tax_benefit_system.variables[variable].value_type == Enum: permanent_value = simulation.calculate( variable, - period = periods.period(self.year).first_month, + period = periods.period(self.period).first_month, ) else: permanent_value = simulation.calculate_add( variable, - period = self.year, + period = self.period, ) for offset in [-1, -2]: @@ -183,7 +188,6 @@ def custom_initialize(self, simulation): except ValueError as e: log.debug(f"Dealing with: {e}") if sum(simulation.calculate_add(variable, simulation_period.offset(offset))) != sum(permanent_value): - use_baseline = self.baseline_simulation == simulation simulation.delete_arrays(variable, simulation_period.offset(offset)) simulation.set_input( variable, diff --git a/setup.py b/setup.py index 47c9ea66..a49ead52 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ setup( name = "OpenFisca-France-Data", - version = "2.0.7", + version = "3.0.0", description = "OpenFisca-France-Data module to work with French survey data", long_description = long_description, long_description_content_type="text/markdown", @@ -42,9 +42,8 @@ python_requires = ">=3.9", install_requires = [ "multipledispatch >=0.6.0, <1.0.0", - "OpenFisca-France >=150.0.0, <154.0.0", - "openFisca-survey-manager >=1, <2.0.0", - "wquantiles >=0.3.0, <1.0.0", # To compute weighted quantiles + "OpenFisca-France >=150.0.0, <155.0.0", + "openFisca-survey-manager >=2.0.0, <2.1.0", ], extras_require = { "test": [ diff --git a/tests/conftest.py b/tests/conftest.py index 1b6cc218..933e6350 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -35,7 +35,7 @@ def survey_scenario(tax_benefit_system: TaxBenefitSystem): def _survey_scenario(year: int) -> ErfsFprSurveyScenario: return ErfsFprSurveyScenario.create( tax_benefit_system = tax_benefit_system, - year = year, + period = year, ) return _survey_scenario diff --git a/tests/erfs_fpr/integration/test_pivot_table.py b/tests/erfs_fpr/integration/test_pivot_table.py index 4ba92652..81b0bc04 100644 --- a/tests/erfs_fpr/integration/test_pivot_table.py +++ b/tests/erfs_fpr/integration/test_pivot_table.py @@ -60,4 +60,4 @@ def get_survey_scenario(kind = 'erfs_fpr', year = None): year = 2009 SurveyScenario = ErfsSurveyScenario - return SurveyScenario.create(year = year) + return SurveyScenario.create(period = year) diff --git a/tests/test_aggregate.py b/tests/test_aggregate.py index 61adb5a4..1e6d3fc0 100644 --- a/tests/test_aggregate.py +++ b/tests/test_aggregate.py @@ -8,12 +8,13 @@ ) -@pytest.mark.skip( - reason = "FileNotFoundError: [Errno 2] No such file or directory: '/opt/hostedtoolcache/Python/3.9.9/x64/lib/python3.9/site-packages/openfisca_france_data/assets/agregats_tests_taxipp_2_0.xlsx'", - ) def test_erfs_survey_simulation(survey_scenario, fake_input_data, year: int = 2009): # On ititialise le survey scenario - survey_scenario = ErfsFprSurveyScenario.create(year) + survey_scenario = ErfsFprSurveyScenario.create( + tax_benefit_system = france_data_tax_benefit_system, + + period = year, + ) # On charge les données input_data = fake_input_data(year) @@ -23,16 +24,13 @@ def test_erfs_survey_simulation(survey_scenario, fake_input_data, year: int = 20 # On calcule les agrégats aggregates = Aggregates(survey_scenario = survey_scenario, target_source = 'taxipp') - aggregates.compute_aggregates(use_baseline = False) + aggregates.compute_aggregates() return aggregates.base_data_frame -@pytest.mark.skip( - reason = "FileNotFoundError: [Errno 2] No such file or directory: '/opt/hostedtoolcache/Python/3.9.9/x64/lib/python3.9/site-packages/openfisca_france_data/assets/agregats_tests_taxipp_2_0.xlsx'", - ) def test_erfs_fpr_aggregates_reform(fake_input_data, year:int = 2013): survey_scenario = ErfsFprSurveyScenario.create( - year = year, + period = year, reform_key = 'plf2015', baseline_tax_benefit_system = france_data_tax_benefit_system, ) diff --git a/tests/test_calibration.py b/tests/test_calibration.py index c56f0308..a9293435 100644 --- a/tests/test_calibration.py +++ b/tests/test_calibration.py @@ -1,14 +1,16 @@ -import pkg_resources import os import pytest from openfisca_survey_manager.calibration import Calibration # type: ignore +from openfisca_france_data import openfisca_france_data_location + + @pytest.fixture def location() -> str: - return pkg_resources.get_distribution("openfisca-france-data").location + return openfisca_france_data_location def test_calibration(survey_scenario, fake_input_data, location, year: int = 2009): @@ -22,16 +24,14 @@ def test_calibration(survey_scenario, fake_input_data, location, year: int = 200 survey_scenario.init_from_data(data = dict(input_data_frame = input_data)) # On fait la calibration - calibration = Calibration(survey_scenario) - calibration.parameters["method"] = "linear" - calibration.total_population = calibration.initial_total_population * 1.123 - - calibration.set_parameters("invlo", 3) - calibration.set_parameters("up", 3) - calibration.set_parameters("method", "logit") + parameters = dict( + method = "logit", + invlo = 3, + up = 3, + ) - calibration.calibrate() pre_cal_weight = survey_scenario.calculate_variable("wprm", period = year) - calibration.set_calibrated_weights() - assert pre_cal_weight * 1.123 == survey_scenario.calculate_variable("wprm", period = year) + target_entity_count = pre_cal_weight.sum() * 1.123 + survey_scenario.calibrate(target_entity_count = target_entity_count, entity = "menage", parameters = parameters, period = year) + assert pre_cal_weight * 1.123 == survey_scenario.calculate_variable("wprm", period = year) diff --git a/tests/test_fake_survey_simulation.py b/tests/test_fake_survey_simulation.py index 47694fcf..9eacf4d0 100644 --- a/tests/test_fake_survey_simulation.py +++ b/tests/test_fake_survey_simulation.py @@ -10,29 +10,13 @@ from openfisca_france_data.reforms.old_openfisca_france_reforms.plf2015 import plf2015 # type: ignore -@pytest.fixture -def fake_calibration(tax_benefit_system): - def _fake_calibration(fake_input_data: pandas.DataFrame, year: int) -> Calibration: - input_data_frame = fake_input_data(year) - - survey_scenario = ErfsSurveyScenario.create( - tax_benefit_system = tax_benefit_system, - year = year, - ) - - survey_scenario.init_from_data(data = dict(input_data_frame = input_data_frame)) - - calibration = Calibration(survey_scenario = survey_scenario, period = year) - calibration.set_parameters("invlo", 3) - calibration.set_parameters("up", 3) - calibration.set_parameters("method", "logit") - - return calibration - - return _fake_calibration +parameters = dict( + invlo = 3, + up = 3, + method = "logit", + ) -@pytest.mark.skip(reason = "AssertionError: [0. 0.] != [20000. 10000.]") def test_fake_survey_simulation(tax_benefit_system, fake_input_data, year: int = 2006): input_data_frame = fake_input_data(year) assert input_data_frame.salaire_imposable.loc[0] == 20000 @@ -40,7 +24,7 @@ def test_fake_survey_simulation(tax_benefit_system, fake_input_data, year: int = survey_scenario = ErfsSurveyScenario.create( tax_benefit_system = tax_benefit_system, - year = year, + period = year, ) survey_scenario.init_from_data(data = dict(input_data_frame = input_data_frame)) @@ -48,7 +32,7 @@ def test_fake_survey_simulation(tax_benefit_system, fake_input_data, year: int = assert (input_data_frame.salaire_imposable.loc[0] == 20000).all() assert (input_data_frame.salaire_imposable.loc[1] == 10000).all() - simulation = survey_scenario.simulation + simulation = survey_scenario.simulations["baseline"] salaire_imposable = simulation.calculate_add("salaire_imposable", period = year) @@ -60,29 +44,34 @@ def test_fake_survey_simulation(tax_benefit_system, fake_input_data, year: int = assert age[0] == 77 assert age[1] == 37 - sal_2003 = simulation.calculate_add("salaire_imposable", period = 2003) - sal_2004 = simulation.calculate_add("salaire_imposable", period = 2004) - sal_2005 = simulation.calculate_add("salaire_imposable", period = 2005) - sal_2006 = simulation.calculate_add("salaire_imposable", period = 2006) + retraite_2003 = simulation.calculate_add("retraite_imposable", period = 2003) + retraite_2004 = simulation.calculate_add("retraite_imposable", period = 2004) + retraite_2005 = simulation.calculate_add("retraite_imposable", period = 2005) + retraite_2006 = simulation.calculate_add("retraite_imposable", period = 2006) - assert (sal_2003 == 0).all() - assert (sal_2004 == sal_2006).all() - assert (sal_2005 == sal_2006).all() + assert (retraite_2003 == 0).all() + assert (retraite_2004 == retraite_2006).all() + assert (retraite_2005 == retraite_2006).all() for year, month in itertools.product(range(2003, 2004), range(1, 13)): - period = f"{year}-{month}" + if month < 10: + period = f"{year}-0{month}" + else: + period = f"{year}-{month}" assert ( simulation - .calculate_add("salaire_imposable", period = period) == 0 + .calculate_add("retraite_imposable", period = period) == 0 ).all() for year, month in itertools.product(range(2004, 2007), range(1, 13)): - period = f"{year}-{month}" + if month < 10: + period = f"{year}-0{month}" + else: + period = f"{year}-{month}" assert ( - simulation - .calculate("salaire_imposable", period = period) == sal_2006 / 12 + simulation.calculate("retraite_imposable", period = period) == retraite_2006 / 12 ).all() data_frame_by_entity = survey_scenario.create_data_frame_by_entity( @@ -91,6 +80,7 @@ def test_fake_survey_simulation(tax_benefit_system, fake_input_data, year: int = "activite", "rsa_base_ressources_i", "menage_ordinaire_individus", + "retraite_imposable", "salaire_imposable", "salaire_net", "autonomie_financiere", @@ -106,66 +96,84 @@ def test_fake_survey_simulation(tax_benefit_system, fake_input_data, year: int = "weight_familles", "revenu_disponible", "impo", - ] + ], ) - return data_frame_by_entity, simulation -@pytest.mark.skip( - reason = "AttributeError: 'Calibration' object has no attribute 'simulation'", - ) -def test_fake_calibration_float(fake_calibration, year: int = 2006): - calibration = fake_calibration(year) - calibration.total_population = calibration.initial_total_population * 1.123 +def test_fake_calibration_float(tax_benefit_system, fake_input_data, year: int = 2006): + input_data_frame = fake_input_data(year) + survey_scenario = ErfsSurveyScenario.create( + tax_benefit_system = tax_benefit_system, + period = year, + ) + survey_scenario.init_from_data(data = dict(input_data_frame = input_data_frame)) - revenu_disponible_target = 7e6 - calibration.set_target_margin("revenu_disponible", revenu_disponible_target) - calibration.calibrate() - calibration.set_calibrated_weights() + initial_count = survey_scenario.compute_aggregate("revenu_disponible", aggfunc = "count", period = year) + target_entity_count = initial_count * 1.123 - simulation = calibration.survey_scenario.simulation + revenu_disponible_target = 7.7e6 + target_margins_by_variable = dict(revenu_disponible = revenu_disponible_target) + + survey_scenario.calibrate( + period = year, + target_margins_by_variable = target_margins_by_variable, + target_entity_count = target_entity_count, + parameters=parameters, + ) assert_near( - simulation.calculate("wprm", period=year).sum(), - calibration.total_population, + survey_scenario.calculate_variable("wprm", period=year).sum(), + target_entity_count, absolute_error_margin = None, relative_error_margin = 0.00001, ) assert_near( ( - + simulation.calculate("revenu_disponible", period = year) - * simulation.calculate("wprm", period = year) + + survey_scenario.calculate_variable("revenu_disponible", period = year) + * survey_scenario.calculate_variable("wprm", period = year) ).sum(), revenu_disponible_target, absolute_error_margin = None, relative_error_margin = 0.00001, ) - @pytest.mark.skip( - reason = "ValueError: Length of values does not match length of index", + reason = "ValueError: Unable to compute variable 'age' for period 2006: 'age' must be computed for a whole month. Should use an ge with year definition period", ) -def test_fake_calibration_age(fake_calibration, fake_input_data, year: int = 2006): - calibration = fake_calibration(fake_input_data, year) - survey_scenario = calibration.survey_scenario - calibration.total_population = calibration.initial_total_population * 1.123 - calibration.set_target_margin("age", [95, 130]) - calibration.calibrate() - calibration.set_calibrated_weights() +def test_fake_calibration_age(tax_benefit_system, fake_input_data, year: int = 2006): + input_data_frame = fake_input_data(year) + period = year + survey_scenario = ErfsSurveyScenario.create( + tax_benefit_system = tax_benefit_system, + period = period, + ) + survey_scenario.init_from_data(data = dict(input_data_frame = input_data_frame)) - simulation = survey_scenario.simulation + initial_count = survey_scenario.compute_aggregate("wprm", aggfunc = "count", period = year) + target_entity_count = initial_count * 1.123 + + target_margins_by_variable = { + "age": [95, 130] + } + + survey_scenario.calibrate( + period = period, + target_margins_by_variable = target_margins_by_variable, + target_entity_count = target_entity_count, + parameters=parameters, + ) assert_near( - simulation.calculate("wprm").sum(), - calibration.total_population, + survey_scenario.calculate_variable("wprm", period=period).sum(), + target_entity_count, absolute_error_margin = None, relative_error_margin = 0.00001, ) - age = (survey_scenario.simulation.calculate("age"),) - weight_individus = survey_scenario.simulation.calculate("weight_individus") + age = survey_scenario.calculate_variable("age", period = period) + weight_individus = survey_scenario.calculate_variable("weight_individus", period = year) for category, target in calibration.margins_by_variable["age"]["target"].items(): actual = ((age == category) * weight_individus).sum() / weight_individus.sum() @@ -197,7 +205,7 @@ def test_calculate_irpp_before_and_after_plf2015( survey_scenario = ErfsSurveyScenario.create( tax_benefit_system = reform, baseline_tax_benefit_system = tax_benefit_system, - year = year, + period = year, ) # On charge les données