From 780657f4fd55545bba99d082822b569eb7fdd8e8 Mon Sep 17 00:00:00 2001 From: Mahdi Ben Jelloul Date: Tue, 28 Mar 2023 09:01:39 +0000 Subject: [PATCH 01/41] Add a test case creator --- .gitlab-ci.yml | 2 - ci-runner/build_ci.py | 1 - gitlab_ci/all_years_build_and_aggregates.yml | 1 - openfisca_france_data/__init__.py | 2 +- .../erfs_fpr/get_survey_scenario.py | 63 ++++++-- .../input_data_builder/step_05_final.py | 1 + .../erfs_fpr/test_case_creation.py | 146 ++++++++++++++++++ openfisca_france_data/model/base.py | 3 - setup.py | 9 +- tests/erfs_fpr/integration/test_aggregates.py | 4 +- 10 files changed, 208 insertions(+), 24 deletions(-) create mode 100644 openfisca_france_data/erfs_fpr/test_case_creation.py diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index e41981c3..16d1e951 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -134,8 +134,6 @@ diagnostics: - cat ~/.config/openfisca-france-data/config.ini - compare-erfs-fpr-input -u -s -v - cp -r /mnt/data-out/openfisca-france-data/$OUT_FOLDER/figures_directory . - - ls -alrth - - ls -alrth figures_directory - cp -r ./figures_directory $ROOT_FOLDER/$OUT_FOLDER/data_output stage: diagnostics tags: diff --git a/ci-runner/build_ci.py b/ci-runner/build_ci.py index 014c0acc..6ec8fea1 100644 --- a/ci-runner/build_ci.py +++ b/ci-runner/build_ci.py @@ -111,7 +111,6 @@ def get_erfs_years(): raise KeyError - def build_gitlab_ci(erfs_years): gitlab_ci = header() # gitlab_ci += yaml.dump(make_test()) diff --git a/gitlab_ci/all_years_build_and_aggregates.yml b/gitlab_ci/all_years_build_and_aggregates.yml index 145e4557..66a4ff01 100644 --- a/gitlab_ci/all_years_build_and_aggregates.yml +++ b/gitlab_ci/all_years_build_and_aggregates.yml @@ -11,7 +11,6 @@ input_data-2019: - mkdir -p $ROOT_FOLDER/$OUT_FOLDER - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini ~/.config/openfisca-survey-manager/config.ini - cat ~/.config/openfisca-survey-manager/config.ini - - ls $ROOT_FOLDER/$OUT_FOLDER/data_collections - build-erfs-fpr -y 2019 -f $ROOT_FOLDER/$OUT_FOLDER/data_output/erfs_flat_2019.h5 - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2019.ini stage: build_input_data diff --git a/openfisca_france_data/__init__.py b/openfisca_france_data/__init__.py index 8bb2ec7c..9b936da8 100644 --- a/openfisca_france_data/__init__.py +++ b/openfisca_france_data/__init__.py @@ -9,7 +9,7 @@ import openfisca_france # type: ignore # Load input variables and output variables into entities -from openfisca_france_data.model import common, survey_variables, id_variables # noqa analysis:ignore +from openfisca_france_data.model import common, survey_variables # noqa analysis:ignore from openfisca_france_data.model.base import * # noqa analysis:ignore diff --git a/openfisca_france_data/erfs_fpr/get_survey_scenario.py b/openfisca_france_data/erfs_fpr/get_survey_scenario.py index b55b379a..9c652d1b 100644 --- a/openfisca_france_data/erfs_fpr/get_survey_scenario.py +++ b/openfisca_france_data/erfs_fpr/get_survey_scenario.py @@ -2,13 +2,61 @@ from multipledispatch import dispatch # type: ignore +from openfisca_core.model_api import Variable, ADD, YEAR from openfisca_core.reforms import Reform # type: ignore from openfisca_core.taxbenefitsystems import TaxBenefitSystem # type: ignore +from openfisca_france.entities import Individu from openfisca_france_data.erfs_fpr.scenario import ErfsFprSurveyScenario from openfisca_france_data import france_data_tax_benefit_system +from openfisca_france_data.model.id_variables import ( + idmen_original, + noindiv, + ) + +variables_converted_to_annual = [ + "salaire_net", + "chomage_net", + "retraite_nette", + "ppa", + ] + + +class erfs_fpr_plugin(Reform): + name = "ERFS-FPR ids plugin" + + def apply(self): + + for variable in variables_converted_to_annual: + class_name = f"{variable}_annuel" + label = f"{variable} sur l'année entière" + + def formula_creator(variable): + def formula(individu, period): + result = individu(variable, period, options = [ADD]) + return result + + formula.__name__ = 'formula' + + return formula + + variable_instance = type(class_name, (Variable,), dict( + value_type = int, + entity = self.variables[variable].entity, + label = label, + definition_period = YEAR, + formula = formula_creator(variable), + )) + + self.add_variable(variable_instance) + del variable_instance + + self.add_variable(idmen_original) + self.add_variable(noindiv) + + def get_survey_scenario( year: int = None, rebuild_input_data: bool = False, @@ -40,15 +88,6 @@ def get_survey_scenario( baseline_tax_benefit_system, ) - - from openfisca_france_data.model.id_variables import ( - idmen_original, - noindiv, - ) - - tax_benefit_system.add_variable(idmen_original) - tax_benefit_system.add_variable(noindiv) - if not use_marginal_tax_rate: survey_scenario = ErfsFprSurveyScenario.create( tax_benefit_system = tax_benefit_system, @@ -114,7 +153,7 @@ def get_tax_benefit_system( tax_benefit_system: None, reform: Reform, ) -> TaxBenefitSystem: - return reform(france_data_tax_benefit_system) + return reform(erfs_fpr_plugin(france_data_tax_benefit_system)) # Appelé quand *tax_benefit_system* et *reform* sont `None` @@ -123,7 +162,7 @@ def get_tax_benefit_system( tax_benefit_system: None, reform: None, ) -> TaxBenefitSystem: - return france_data_tax_benefit_system + return erfs_fpr_plugin(france_data_tax_benefit_system) # Appelé quand *tax_benefit_system* est un :class:`TaxBenefitSystem` @@ -139,4 +178,4 @@ def get_baseline_tax_benefit_system( def get_baseline_tax_benefit_system( tax_benefit_system: None, ) -> TaxBenefitSystem: - return france_data_tax_benefit_system + return erfs_fpr_plugin(france_data_tax_benefit_system) diff --git a/openfisca_france_data/erfs_fpr/input_data_builder/step_05_final.py b/openfisca_france_data/erfs_fpr/input_data_builder/step_05_final.py index 1d3e0abe..dd76a35f 100644 --- a/openfisca_france_data/erfs_fpr/input_data_builder/step_05_final.py +++ b/openfisca_france_data/erfs_fpr/input_data_builder/step_05_final.py @@ -127,6 +127,7 @@ def create_input_data_frame(temporary_store = None, year = None, export_flattene ) log.debug("End of create_input_data_frame") + def create_collectives_foyer_variables(individus, menages): menages_revenus_fonciers = menages[['idmen', 'rev_fonciers_bruts']].copy() idmens = menages_revenus_fonciers.query('(rev_fonciers_bruts > 0)')['idmen'].tolist() diff --git a/openfisca_france_data/erfs_fpr/test_case_creation.py b/openfisca_france_data/erfs_fpr/test_case_creation.py new file mode 100644 index 00000000..b2572da1 --- /dev/null +++ b/openfisca_france_data/erfs_fpr/test_case_creation.py @@ -0,0 +1,146 @@ +import click +import ipdb as pdb +import logging +import pandas as pd +import sys +import yaml + +from pandas.api.types import is_datetime64_any_dtype as is_datetime +from openfisca_core.model_api import ETERNITY + + +from openfisca_france_data import france_data_tax_benefit_system +from openfisca_france_data.erfs_fpr import original_id_by_entity +from openfisca_france_data.erfs_fpr.scenario import ErfsFprSurveyScenario +from openfisca_france_data.erfs_fpr.comparison import ErfsFprtoInputComparator +from openfisca_france_data.erfs_fpr.get_survey_scenario import variables_converted_to_annual + + +tax_benefit_system = france_data_tax_benefit_system +openfisca_variables_by_entity = dict( + ( + entity.key, + [variable_name for variable_name, variable in tax_benefit_system.variables.items() if variable.entity.key == entity.key], + ) + for entity in tax_benefit_system.entities + ) + +id_variable_by_entity_key = ErfsFprSurveyScenario.id_variable_by_entity_key +weight_variable_by_entity = ErfsFprSurveyScenario.weight_variable_by_entity + + +renaming_variables_to_annual = { + monthly_variable: f"{monthly_variable}_annuel" + for monthly_variable in variables_converted_to_annual + } + +def build_test(period, noindiv, target_variables = None): + if target_variables is None: + target_variables = ErfsFprtoInputComparator.default_target_variables + + comparator = ErfsFprtoInputComparator() + comparator.period = period + input_dataframe_by_entity, target_dataframe_by_entity = comparator.get_test_dataframes(rebuild = True, noindivs = [noindiv]) + + def convert_date_to_sting(dataframe): + date_columns = list(dataframe.select_dtypes(include=["datetime64"])) + dataframe[date_columns] = dataframe[date_columns].astype(str) + + def remove_non_openfisca_columns(dataframe): + openfisca_variables = set(sum([list(value) for value in openfisca_variables_by_entity.values()], [])).union(set(["noindiv", "idmen_original"])) + selected_variables = list(set(dataframe.columns).intersection(openfisca_variables)) + return dataframe[selected_variables] + + def build_test_dict(dataframe_by_entity, renaming_variables_to_annual = None): + input_by_entity = dict() + for entity, dataframe in dataframe_by_entity.items(): + convert_date_to_sting(dataframe) + identifier = "noindiv" if entity == "individu" else "idmen_original" + entity_plural = "individus" if entity == "individu" else "menages" + input_by_entity[entity_plural] = input = dict() + dataframe[identifier] = "id_" + dataframe[identifier].astype(str) + df = remove_non_openfisca_columns(dataframe).set_index(identifier) + for row, series in df.iterrows(): + series.drop( + ( + list(weight_variable_by_entity.values()) + + list(id_variable_by_entity_key.values()) + + list(original_id_by_entity.values()) + ), + inplace = True, + errors = "ignore", + ) + if renaming_variables_to_annual: + series.rename(renaming_variables_to_annual, inplace = True) + input[row] = series.dropna().to_dict() + + return input_by_entity + + input_by_entity = build_test_dict(input_dataframe_by_entity) + output_by_entity = build_test_dict(target_dataframe_by_entity, renaming_variables_to_annual) + + relative_error_margin = { + "default": 5e-3, + } + test = dict( + name = f"Observation {noindiv} on {period}", + reforms = "openfisca_france_data.erfs_fpr.get_survey_scenario.erfs_fpr_plugin", + max_spiral_loops = 4, + relative_error_margin = relative_error_margin, + period = period, + input = input_by_entity, + output = output_by_entity, + ) + return test + + + +def export_test_file(period, noindiv, test_case_name = None): + """ + Export a erfs-fpr input and output to an OpenFisca test case. + + Args: + period (int): simulation year + noindiv (int): individu id number + test_case_name (str, optional): _description_. Defaults to Name of the test case file. Defaults to 'test_case_erfs_fpr_NOINDIV'. + """ + if test_case_name is None: + test_case_name = f"test_case_erfs_fpr_{noindiv}" + + test_case_file_path = f'{test_case_name}.yaml' + test = build_test(period, noindiv) + + with open(test_case_file_path, 'w') as file: + yaml.dump(test, file, sort_keys=False) + + text = get_erfs_fpr_data_as_comment(noindiv) + + with open(test_case_file_path, "a+") as file: + _ = file.read() # this auto closes the file after reading, which is a good practice + file.write(text) + + +def get_erfs_fpr_data_as_comment(noind): + return "# Blabal" + + +@click.command() +@click.option('-n', '--noindiv', type = int, help = "Individual id number", required = True) +@click.option('-v', '--verbose', is_flag = True, default = False, help = "Increase output verbosity", show_default = True) +@click.option('-d', '--debug', is_flag = True, default = False, help = "Use python debugger", show_default = True) +def create_test(noindiv = 0, verbose = False, debug = False): + """Create test case for a specific ERFS FPR individual.""" + assert noindiv != 0, "Provide valid individual" + logging.basicConfig(level = logging.DEBUG if verbose else logging.WARNING, stream = sys.stdout) + from openfisca_france_data.erfs_fpr import REFERENCE_YEAR + period = REFERENCE_YEAR + try: + export_test_file(period, noindiv) + except Exception as e: + if debug: + pdb.post_mortem(sys.exc_info()[2]) + raise e + + +if __name__ == "__main__": + sys.exit(create_test()) diff --git a/openfisca_france_data/model/base.py b/openfisca_france_data/model/base.py index a954ca0e..cc508420 100644 --- a/openfisca_france_data/model/base.py +++ b/openfisca_france_data/model/base.py @@ -1,6 +1,3 @@ -from datetime import date - - from openfisca_france.model.base import * diff --git a/setup.py b/setup.py index 162783e4..99ee1db7 100644 --- a/setup.py +++ b/setup.py @@ -35,8 +35,10 @@ "click >= 8.0.0, < 9.0.0", "matplotlib >= 3.1.1, < 4.0.0", "multipledispatch >= 0.6.0, < 1.0.0", - "openFisca-france >= 145.0.0, < 146.0.0", - "openFisca-survey-manager >= 0.47.2, < 1.0.0", + # "openFisca-france >= 145.0.0, < 146.0.0", + "OpenFisca-France @ git+https://github.com/openfisca/openfisca-france.git@version_leap", + # "openFisca-survey-manager >= 0.47.2, < 1.0.0", + "OpenFisca-Survey-Manager @ git+https://github.com/openfisca/openfisca-survey-manager.git@version_leap", "wquantiles >= 0.3.0, < 1.0.0", # To compute weighted quantiles ], extras_require = { @@ -45,10 +47,11 @@ "bumpver >= 2022.1120", "dtale", "flake8 >= 3.7.0, < 3.8.0", + "ipdb >=0.13, <1.0", "ipython >= 7.5.0, < 8.0.0", "mypy >= 0.670, < 1.0.0", "pypandoc", - 'pytest >= 5.0.0, < 7.0.0', + # "pytest", # "pytest-cov >= 2.6.0, < 3.0.0", "scipy >= 1.2.1, < 2.0.0", "toolz >= 0.9.0, < 1.0.0", diff --git a/tests/erfs_fpr/integration/test_aggregates.py b/tests/erfs_fpr/integration/test_aggregates.py index 479888f2..3473bc70 100644 --- a/tests/erfs_fpr/integration/test_aggregates.py +++ b/tests/erfs_fpr/integration/test_aggregates.py @@ -10,7 +10,9 @@ import os + from openfisca_france_data import france_data_tax_benefit_system +from openfisca_france_data.erfs_fpr import REFERENCE_YEAR from openfisca_france_data.erfs_fpr.get_survey_scenario import get_survey_scenario from openfisca_france_data.aggregates import FranceAggregates as Aggregates @@ -76,7 +78,7 @@ def test_erfs_fpr_aggregates_reform(): @click.command() -@click.option('-y', '--year', 'year', default = 2018, help = "ERFS-FPR year", show_default = True, +@click.option('-y', '--year', 'year', default = REFERENCE_YEAR, help = "ERFS-FPR year", show_default = True, type = int, required = True) @click.option('-c', '--configfile', default = None, help = 'raw_data.ini path to read years to process.', show_default = True) From 442110882497b4ce262eb02e25d7b9315337284a Mon Sep 17 00:00:00 2001 From: Mahdi Ben Jelloul Date: Wed, 29 Mar 2023 07:17:22 +0000 Subject: [PATCH 02/41] Fix traitement brut erfs fpr --- openfisca_france_data/common.py | 30 +-- openfisca_france_data/erfs/scenario.py | 2 + openfisca_france_data/erfs_fpr/comparison.py | 24 +-- .../erfs_fpr/get_survey_scenario.py | 3 + .../step_03_variables_individuelles.py | 199 +++--------------- .../input_data_builder/step_05_final.py | 3 +- openfisca_france_data/erfs_fpr/scenario.py | 2 +- openfisca_france_data/surveys.py | 11 +- 8 files changed, 70 insertions(+), 204 deletions(-) diff --git a/openfisca_france_data/common.py b/openfisca_france_data/common.py index de8cf677..2507df29 100644 --- a/openfisca_france_data/common.py +++ b/openfisca_france_data/common.py @@ -230,15 +230,15 @@ def create_traitement_indiciaire_brut(individus, period = None, revenu_type = 'i contrat_de_travail = individus.contrat_de_travail heures_remunerees_volume = individus.heures_remunerees_volume - legislation = tax_benefit_system.parameters(period.start) + parameters = tax_benefit_system.parameters(period.start) - salarie = legislation.cotsoc.cotisations_salarie - plafond_securite_sociale_mensuel = legislation.prelevements_sociaux.pss.plafond_securite_sociale_mensuel - legislation_csg_deductible = legislation.prelevements_sociaux.contributions_sociales.csg.activite.deductible - taux_csg = legislation_csg_deductible.taux - taux_abattement = legislation_csg_deductible.abattement.rates[0] + salarie = parameters.cotsoc.cotisations_salarie + plafond_securite_sociale_mensuel = parameters.prelevements_sociaux.pss.plafond_securite_sociale_mensuel + parameters_csg_deductible = parameters.prelevements_sociaux.contributions_sociales.csg.activite.deductible + taux_csg = parameters_csg_deductible.taux + taux_abattement = parameters_csg_deductible.abattement.rates[0] try: - seuil_abattement = legislation_csg_deductible.abattement.thresholds[1] + seuil_abattement = parameters_csg_deductible.abattement.thresholds[1] except IndexError: # Pour gérer le fait que l'abattement n'a pas toujours été limité à 4 PSS seuil_abattement = None csg_deductible = MarginalRateTaxScale(name = 'csg_deductible') @@ -250,11 +250,11 @@ def create_traitement_indiciaire_brut(individus, period = None, revenu_type = 'i # Cas des revenus nets: # comme les salariés du privé, on ajoute CSG imposable et crds qui s'appliquent à tous les revenus # 1. csg imposable - legislation_csg_imposable = legislation.prelevements_sociaux.contributions_sociales.csg.activite.imposable - taux_csg = legislation_csg_imposable.taux - taux_abattement = legislation_csg_imposable.abattement.rates[0] + parameters_csg_imposable = parameters.prelevements_sociaux.contributions_sociales.csg.activite.imposable + taux_csg = parameters_csg_imposable.taux + taux_abattement = parameters_csg_imposable.abattement.rates[0] try: - seuil_abattement = legislation_csg_imposable.abattement.thresholds[1] + seuil_abattement = parameters_csg_imposable.abattement.thresholds[1] except IndexError: # Pour gérer le fait que l'abattement n'a pas toujours été limité à 4 PSS seuil_abattement = None csg_imposable = MarginalRateTaxScale(name = 'csg_imposable') @@ -262,11 +262,11 @@ def create_traitement_indiciaire_brut(individus, period = None, revenu_type = 'i if seuil_abattement is not None: csg_imposable.add_bracket(seuil_abattement, taux_csg) # 2. crds - legislation_crds = legislation.prelevements_sociaux.contributions_sociales.crds.activite - taux_csg = legislation_crds.taux - taux_abattement = legislation_crds.abattement.rates[0] + parameters_crds = parameters.prelevements_sociaux.contributions_sociales.crds.activite + taux_csg = parameters_crds.taux + taux_abattement = parameters_crds.abattement.rates[0] try: - seuil_abattement = legislation_crds.abattement.thresholds[1] + seuil_abattement = parameters_crds.abattement.thresholds[1] except IndexError: # Pour gérer le fait que l'abattement n'a pas toujours été limité à 4 PSS seuil_abattement = None crds = MarginalRateTaxScale(name = 'crds') diff --git a/openfisca_france_data/erfs/scenario.py b/openfisca_france_data/erfs/scenario.py index 19195397..46ad488b 100644 --- a/openfisca_france_data/erfs/scenario.py +++ b/openfisca_france_data/erfs/scenario.py @@ -26,6 +26,7 @@ class ErfsSurveyScenario(AbstractErfsSurveyScenario): 'nbN', 'nbR', 'pensions_alimentaires_percues', + 'primes_fonction_publique', 'rag', 'retraite_brute', 'retraite_imposable', @@ -36,6 +37,7 @@ class ErfsSurveyScenario(AbstractErfsSurveyScenario): 'statut_marital', 'statut_occupation_logement', 'taxe_habitation', + 'traitement_indiciaire_brut', 'zone_apl', ] diff --git a/openfisca_france_data/erfs_fpr/comparison.py b/openfisca_france_data/erfs_fpr/comparison.py index a649f4bf..f8e0b170 100644 --- a/openfisca_france_data/erfs_fpr/comparison.py +++ b/openfisca_france_data/erfs_fpr/comparison.py @@ -15,15 +15,15 @@ openfisca_by_erfs_fpr_variables = { - "chomage_i": "chomage_net", + "chomage_i": "chomage_imposable", "ident": "idmen_original", "noindiv": "noindiv", - "rag_i": "rag_net", - "retraites_i": "retraite_nette", # TODO: CHECk + "rag_i": "rag", + "retraites_i": "retraite_imposable", # TODO: CHECk "rev_fonciers_bruts": "f4ba", - "ric_i": "ric_net", - "rnc_i": "rnc_net", - "salaires_i": "salaire_net", + "ric_i": "ric", + "rnc_i": "rnc", + "salaires_i": "salaire_imposable", } @@ -31,13 +31,13 @@ class ErfsFprtoInputComparator(AbstractComparator): name = "erfs_fpr" period = None default_target_variables = [ - "chomage_net", - # "rag_net", TODO: does not exist in openfisca - "retraite_nette", - # "ric_net", TODO: does not exist in openfisca - # "rnc_net", TODO: does not exist in openfisca + "chomage_imposable", + # "rag", + "retraite_imposable", + # "ric", + # "rnc", # "f4ba", - "salaire_net", + "salaire_imposable", ] def compute_test_dataframes(self): diff --git a/openfisca_france_data/erfs_fpr/get_survey_scenario.py b/openfisca_france_data/erfs_fpr/get_survey_scenario.py index 9c652d1b..290d2f1a 100644 --- a/openfisca_france_data/erfs_fpr/get_survey_scenario.py +++ b/openfisca_france_data/erfs_fpr/get_survey_scenario.py @@ -17,6 +17,9 @@ ) variables_converted_to_annual = [ + "salaire_imposable", + "chomage_imposable", + "retraite_imposable", "salaire_net", "chomage_net", "retraite_nette", diff --git a/openfisca_france_data/erfs_fpr/input_data_builder/step_03_variables_individuelles.py b/openfisca_france_data/erfs_fpr/input_data_builder/step_03_variables_individuelles.py index 7b36012a..7b895658 100644 --- a/openfisca_france_data/erfs_fpr/input_data_builder/step_03_variables_individuelles.py +++ b/openfisca_france_data/erfs_fpr/input_data_builder/step_03_variables_individuelles.py @@ -7,7 +7,6 @@ from openfisca_france_data.common import ( create_salaire_de_base, create_traitement_indiciaire_brut, - create_revenus_remplacement_bruts, ) from openfisca_france_data import openfisca_france_tax_benefit_system from openfisca_france_data.smic import ( @@ -30,13 +29,13 @@ def build_variables_individuelles(temporary_store = None, year = None): individus = temporary_store['individus_{}_post_01'.format(year)] openfisca_by_erfs_variable = { - 'chomage_i': 'chomage_net', + 'chomage_i': 'chomage_imposable', 'pens_alim_recue_i': 'pensions_alimentaires_percues', 'rag_i': 'rag_net', - 'retraites_i': 'retraite_nette', + 'retraites_i': 'retraite_imposable', 'ric_i': 'ric_net', 'rnc_i': 'rnc_net', - 'salaires_i': 'salaire_net', + 'salaires_i': 'salaire_imposable', } for variable in openfisca_by_erfs_variable.keys(): @@ -48,104 +47,55 @@ def build_variables_individuelles(temporary_store = None, year = None): ) create_variables_individuelles(individus, year) assert 'salaire_de_base' in individus.columns , 'salaire de base not in individus' + assert 'traitement_indiciaire_brut' in individus.columns , 'traitement indiciaire brut not in individus' + assert 'primes_fonction_publique' in individus.columns , 'primes fonction publique not in individus' temporary_store['individus_{}'.format(year)] = individus return individus # helpers -def create_variables_individuelles(individus, year, survey_year = None): +def create_variables_individuelles(individus, year, survey_year = None, revenu_type = 'imposable'): """Création des variables individuelles""" + period = periods.period(year) + tax_benefit_system = openfisca_france_tax_benefit_system + # variables démographiques create_ages(individus, year) create_date_naissance(individus, age_variable = None, annee_naissance_variable = 'naia', mois_naissance = 'naim', year = year) + # Base pour constituer les familles, foyers, etc. + create_statut_matrimonial(individus) + + # variable d'activite create_activite(individus) - revenu_type = 'net' - period = periods.period(year) - create_revenus(individus, revenu_type = revenu_type) create_contrat_de_travail(individus, period = period, salaire_type = revenu_type) create_categorie_salarie(individus, period = period, survey_year = survey_year) - # Il faut que la base d'input se fasse au millésime des données - # On fait ça car, aussi bien le TaxBenefitSystem et celui réformé peuvent être des réformes - # Par exemple : si je veux calculer le diff entre le PLF2019 et un ammendement, - # je besoin d'un droit courant comme même du droit courrant pour l'année des données - tax_benefit_system = openfisca_france_tax_benefit_system - - # On n'a pas le salaire brut mais le salaire net ou imposable, on doit l'inverser + # inversion des revenus pour retrouver le brut + # pour les revenus de remplacement on a la csg et la crds dans l'erfs-fpr donc on peut avoir le brut directement + create_revenus_remplacement_bruts(individus) + # On n'a pas le salaire et le traitement_indiciaire brut, on doit l'inverser + # comme on a la crds et la csg non déductible on recalcule l'imposable puis on inverse l'imposable pour avoir le brut + #individus['salaire_imposable'] = individus.salaire_net + individus.csg_nd_crds_sal_i create_salaire_de_base( individus, period = period, revenu_type = revenu_type, tax_benefit_system = tax_benefit_system ) - + create_traitement_indiciaire_brut( + individus, + period = period, + revenu_type = revenu_type, + tax_benefit_system = tax_benefit_system) + # Pour les cotisations patronales qui varient avec la taille de l'entreprise' create_effectif_entreprise(individus, period = period, survey_year = survey_year) - # Base pour constituer les familles, foyers, etc. - create_statut_matrimonial(individus) - assert 'salaire_de_base' in individus.columns , 'salaire de base not in individus' return individus -def create_individu_variables_brutes(individus, revenu_type = None, period = None, - tax_benefit_system = None, mass_by_categorie_salarie = None, - calibration_eec = False): - """ - Crée les variables brutes de revenus: - - salaire_de_base - - traitement_indiciaire_brut - - primes_fonction_publique - - retraite_bruite - - chomage_brut - à partir des valeurs nettes ou imposables de ces revenus - et d'autres information individuelles - """ - assert revenu_type in ['imposable', 'net'] - assert period is not None - assert tax_benefit_system is not None - - assert 'age' in individus.columns - - created_variables = [] - create_contrat_de_travail(individus, period = period, salaire_type = revenu_type) - created_variables.append('contrat_de_travail') - created_variables.append('heures_remunerees_volume') - - create_categorie_salarie(individus, period = period) - created_variables.append('categorie_salarie') - create_categorie_non_salarie(individus) - created_variables.append('categorie_non_salarie') - - # FIXME: categorie_non_salarie modifie aussi categorie_salarie !! - if (mass_by_categorie_salarie is not None) & (calibration_eec is True): - calibrate_categorie_salarie(individus, year = None, mass_by_categorie_salarie = mass_by_categorie_salarie) - - create_salaire_de_base(individus, period = period, revenu_type = revenu_type, tax_benefit_system = tax_benefit_system) - created_variables.append('salaire_de_base') - - create_effectif_entreprise(individus, period = period) - created_variables.append('effectif_entreprise') - - create_traitement_indiciaire_brut(individus, period = period, revenu_type = revenu_type, - tax_benefit_system = tax_benefit_system) - created_variables.append('traitement_indiciaire_brut') - created_variables.append('primes_fonction_publique') - - create_taux_csg_remplacement(individus, period, tax_benefit_system) - created_variables.append('taux_csg_remplacement') - created_variables.append('taux_csg_remplacement_n_1') - created_variables.append('rfr_special_csg_n') - created_variables.append('rfr_special_csg_n_1') - - create_revenus_remplacement_bruts(individus, period, tax_benefit_system) - created_variables.append('chomage_brut') - created_variables.append('retraite_brute') - return created_variables - - def create_activite(individus): """Création de la variable activite. @@ -540,31 +490,6 @@ def create_categorie_non_salarie(individus): profession_liberale, 'categorie_non_salarie' ] = 3 - # Correction fonction publique - individus.loc[ - ( - (individus.categorie_salarie == 0) - & (individus.cstot.isin([31, 33, 34, 35, 37, 38,])) - ), - 'categorie_salarie' - ] = 1 - - # Correction encadrement - individus.loc[ - ( - (individus.categorie_salarie == 0) - & (individus.cstot.isin([31, 34, 35, 37 ,38])) # Cadres hors FP - ), - 'categorie_salarie' - ] = 1 - # Correction fonction publique - individus.loc[ - ( - (individus.categorie_salarie.isin([0, 1, 7])) - & (individus.cstot == 53) # Policiers et militaires reversé dans titulaire état - ), - 'categorie_salarie' - ] = 2 def create_contrat_de_travail(individus, period, salaire_type = 'imposable'): @@ -971,74 +896,14 @@ def create_effectif_entreprise(individus, period = None, survey_year = None): individus.effectif_entreprise.value_counts(dropna = False))) -def create_revenus(individus, revenu_type = 'imposable'): - """Création des plusieurs variablesde revenu. - - Ces variables sont: - chomage_net, - pensions_alimentaires_percues, - rag_net, - retraite_nette, - ric_net, - rnc_net, - et éventuellement, si revenus_type = 'imposable' des variables: - chomage_imposable, - rag, - retraite_imposable, - ric, - rnc, - salaire_imposable, +def create_revenus_remplacement_bruts(individus): """ - individus['chomage_brut'] = individus.csgchod_i + individus.chomage_net - individus['retraite_brute'] = individus.csgrstd_i + individus.retraite_nette - - if revenu_type == 'imposable': - variables = [ - # 'pension_alimentaires_percues', - 'chomage_imposable', - 'retraite_imposable', - ] - for variable in variables: - assert variable in individus.columns.tolist(), "La variable {} n'est pas présente".format(variable) - - for variable in variables: - if (individus[variable] < 0).any(): - - negatives_values = individus[variable].value_counts().loc[individus[variable].value_counts().index < 0] - log.debug("La variable {} contient {} valeurs négatives\n {}".format( - variable, - negatives_values.sum(), - negatives_values, - ) - ) - - # csg des revenus de replacement - # 0 - Non renseigné/non pertinent - # 1 - Exonéré - # 2 - Taux réduit - # 3 - Taux plein - taux = pd.concat( - [ - individus.csgrstd_i / individus.retraite_brute, - individus.csgchod_i / individus.chomage_brut, - ], - axis = 1 - ).max(axis = 1) - - # taux.loc[(0 < taux) & (taux < .1)].hist(bins = 100) - individus['taux_csg_remplacement'] = np.select( - [ - taux.isnull(), - taux.notnull() & (taux < 0.021), - taux.notnull() & (taux > 0.021) & (taux < 0.0407), - taux.notnull() & (taux > 0.0407) - ], - [0, 1, 2, 3] - ) - for value in [0, 1, 2, 3]: - assert (individus.taux_csg_remplacement == value).any(), \ - "taux_csg_remplacement ne prend jamais la valeur {}".format(value) - assert individus.taux_csg_remplacement.isin(range(4)).all() + Reconstitution des variables de retraite et chomage brut à partir des variables nettes et des variables de csg et crds + """ + + # revenu_brut = revenu_net + csg_deductible + csg_non_deductible_crds + individus['chomage_brut'] = individus.chomage_imposable + individus.csgchod_i #+ individus. csg_nd_crds_cho_i + individus['retraite_brute'] = individus.retraite_imposable + individus.csgrstd_i #+ individus.csg_nd_crds_ret_i def create_statut_matrimonial(individus): diff --git a/openfisca_france_data/erfs_fpr/input_data_builder/step_05_final.py b/openfisca_france_data/erfs_fpr/input_data_builder/step_05_final.py index dd76a35f..fccaa092 100644 --- a/openfisca_france_data/erfs_fpr/input_data_builder/step_05_final.py +++ b/openfisca_france_data/erfs_fpr/input_data_builder/step_05_final.py @@ -42,7 +42,8 @@ def create_input_data_frame(temporary_store = None, year = None, export_flattene 'statut_marital', # 'salaire_imposable', 'salaire_de_base', - 'taux_csg_remplacement', + "traitement_indiciaire_brut", + "primes_fonction_publique", ] # TODO: fix this simplistic inference diff --git a/openfisca_france_data/erfs_fpr/scenario.py b/openfisca_france_data/erfs_fpr/scenario.py index a72a0d9d..dca44945 100644 --- a/openfisca_france_data/erfs_fpr/scenario.py +++ b/openfisca_france_data/erfs_fpr/scenario.py @@ -27,8 +27,8 @@ class ErfsFprSurveyScenario(AbstractErfsSurveyScenario): "retraite_imposable", "ric", "rnc", - "statut_marital", "salaire_de_base", + "statut_marital", "statut_occupation_logement", "taxe_habitation", "traitement_indiciaire_brut", diff --git a/openfisca_france_data/surveys.py b/openfisca_france_data/surveys.py index b2463c60..0ccc5c58 100644 --- a/openfisca_france_data/surveys.py +++ b/openfisca_france_data/surveys.py @@ -146,11 +146,13 @@ def custom_initialize(self, simulation): ] computed_variables_used_as_input = [ - # 'chomage_brut', "chomage_imposable", + "primes_fonction_publique", "retraite_brute", "retraite_imposable", "salaire_de_base", + "traitement_indiciaire_brut", + # 'chomage_brut', ] three_year_span_variables = input_variables + computed_variables_used_as_input @@ -194,13 +196,6 @@ def custom_input_data_frame(self, input_data_frame, **kwargs): if "loyer" in input_data_frame: input_data_frame["loyer"] = 12 * input_data_frame.loyer - if "categorie_salarie" in input_data_frame: - log.debug("Setting categorie_salarie to 0 and 1 only") - input_data_frame.loc[ - input_data_frame.categorie_salarie.isin(range(2, 7)), - "categorie_salarie", - ] = 1 - for variable in ["quifam", "quifoy", "quimen"]: if variable in input_data_frame: log.debug(input_data_frame[variable].value_counts(dropna = False)) From 54d5054b531c8e68e658c9136a32da8de556b73e Mon Sep 17 00:00:00 2001 From: Mahdi Ben Jelloul Date: Fri, 31 Mar 2023 08:49:21 +0000 Subject: [PATCH 03/41] Ajoute les variables du revenu du capital --- openfisca_france_data/comparator.py | 52 ++- openfisca_france_data/debugger.py | 437 ------------------ openfisca_france_data/erfs_fpr/comparison.py | 14 + .../erfs_fpr/get_survey_scenario.py | 43 +- .../erfs_fpr/input_data_builder/__init__.py | 18 +- .../step_03_variables_individuelles.py | 17 +- .../input_data_builder/step_05_foyer.py | 61 +++ .../{step_05_final.py => step_06_final.py} | 65 ++- openfisca_france_data/erfs_fpr/scenario.py | 3 + 9 files changed, 219 insertions(+), 491 deletions(-) delete mode 100644 openfisca_france_data/debugger.py create mode 100644 openfisca_france_data/erfs_fpr/input_data_builder/step_05_foyer.py rename openfisca_france_data/erfs_fpr/input_data_builder/{step_05_final.py => step_06_final.py} (84%) diff --git a/openfisca_france_data/comparator.py b/openfisca_france_data/comparator.py index 2258d41e..39c27541 100644 --- a/openfisca_france_data/comparator.py +++ b/openfisca_france_data/comparator.py @@ -23,18 +23,20 @@ log = logging.getLogger(__name__) +def get_entity_original_id(survey_scenario, variable): + entity = survey_scenario.tax_benefit_system.variables[variable].entity.key + return "noindiv" if entity == "individu" else "idmen_original" + + def compute_result(variable, survey_scenario, target_dataframe): result = None stats = None - entity = survey_scenario.tax_benefit_system.variables[variable].entity.key - entity_original_id = "noindiv" if entity == "individu" else "ident" + entity_original_id = get_entity_original_id(survey_scenario, variable) output_variables = [entity_original_id, variable] - entity_dataframe = survey_scenario.create_data_frame_by_entity( variables = output_variables, )[entity] - target = target_dataframe[output_variables].rename(columns = {variable: f"target_{variable}"}) if f"target_{variable}" not in target: @@ -47,12 +49,12 @@ def compute_result(variable, survey_scenario, target_dataframe): ) result[f"diff_{variable}"] = result[variable] - result[f"target_{variable}"] - result_variables = ["noindiv", variable, f"diff_{variable}", f"target_{variable}"] - stats = compute_error_stats(result, variable) + result_variables = [entity_original_id, variable, f"diff_{variable}", f"target_{variable}"] + stats = compute_error_stats(result, variable, entity_original_id = entity_original_id) return result, stats -def compute_confidence_interval(data, variable, width = .9): +def compute_confidence_interval(data, variable, width = .9, entity_original_id = None): """ Compute confidence interval @@ -64,9 +66,10 @@ def compute_confidence_interval(data, variable, width = .9): Returns: [type]: [description] """ + assert entity_original_id is not None df = pd.DataFrame({ "signed_values": data[variable].values, - "noind": data["noindiv"].values + "noind": data[entity_original_id].values }) df["abs_values"] = df.signed_values.abs() in_range_obs = ceil(width * len(df)) @@ -79,7 +82,8 @@ def compute_confidence_interval(data, variable, width = .9): return left, right, largest_errors -def compute_error_stats(data, variable): +def compute_error_stats(data, variable, entity_original_id): + assert entity_original_id is not None numerical = ( isinstance(data[variable].values.flat[0], np.integer) or isinstance(data[variable].values.flat[0], np.floating) @@ -89,13 +93,13 @@ def compute_error_stats(data, variable): df = data.loc[ (data[variable].values != 0.0) | (data[f"target_{variable}"].values != 0.0), - [variable, f"target_{variable}", "noindiv"] + [variable, f"target_{variable}", entity_original_id] ].copy() df["relative_error"] = (df[variable] - df[f"target_{variable}"]) / (df[f"target_{variable}"] + (df[f"target_{variable}"] == 0.0) * df[variable]) if df.empty: return - left, right, largest_errors = compute_confidence_interval(df, "relative_error") + left, right, largest_errors = compute_confidence_interval(df, "relative_error", entity_original_id = entity_original_id) less_than_5_pc_error = (df["relative_error"].abs() <= .05).sum() / len(df) less_than_20_pc_error = (df["relative_error"].abs() <= .2).sum() / len(df) more_than_80_pc_error = (df["relative_error"].abs() >= .8).sum() / len(df) @@ -130,7 +134,8 @@ def create_output_files(markdown_sections, figures_directory, filename): ) -def create_variable_distribution_figures(variable, result, bins = None, figures_directory = None): +def create_variable_distribution_figures(variable, result, bins = None, figures_directory = None, entity_original_id = None): + assert entity_original_id is not None log.debug(f"create_variable_distribution_figures: Examining {variable}") assert figures_directory is not None if bins is None: @@ -139,11 +144,12 @@ def create_variable_distribution_figures(variable, result, bins = None, figures_ non_both_zeroes = (result[f"{variable}"].fillna(0) != 0) | (result[f"target_{variable}"].fillna(0) != 0) non_both_zeroes_count = sum(non_both_zeroes) both_zeroes_count = len(result) - non_both_zeroes_count + melted = result.loc[ non_both_zeroes, - ["noindiv", variable, f"target_{variable}"] + [entity_original_id, variable, f"target_{variable}"] ].melt( - id_vars = ["noindiv"], + id_vars = [entity_original_id], value_vars = [f"{variable}", f"target_{variable}"] ) @@ -246,8 +252,8 @@ def create_variable_markdown_summary_section(variable, stats, figures_directory) return markdown_section -def create_diff_variable_distribution_figures(variable, result, bins = None, figures_directory = None): - +def create_diff_variable_distribution_figures(variable, result, bins = None, figures_directory = None, entity_original_id = None): + assert entity_original_id is not None numerical = ( isinstance(result[f"{variable}"].values.flat[0], np.integer) or isinstance(result[f"{variable}"].values.flat[0], np.floating) @@ -475,8 +481,18 @@ def compute_divergence(self, input_dataframe_by_entity, target_dataframe_by_enti ) result_by_variable[variable] = result - variable_distribution_figures_created = create_variable_distribution_figures(variable, result, figures_directory = figures_directory) - diff_variable_distribution_figures_created = create_diff_variable_distribution_figures(variable, result, figures_directory = figures_directory) + variable_distribution_figures_created = create_variable_distribution_figures( + variable, + result, + figures_directory = figures_directory, + entity_original_id = get_entity_original_id(survey_scenario, variable), + ) + diff_variable_distribution_figures_created = create_diff_variable_distribution_figures( + variable, + result, + figures_directory = figures_directory, + entity_original_id = get_entity_original_id(survey_scenario, variable) + ) stats_by_variable[variable] = stats variable_markdown_section = create_variable_markdown_section( diff --git a/openfisca_france_data/debugger.py b/openfisca_france_data/debugger.py deleted file mode 100644 index 8ed43ac2..00000000 --- a/openfisca_france_data/debugger.py +++ /dev/null @@ -1,437 +0,0 @@ -#! /usr/bin/env python -import logging - - -import numpy -from pandas import merge, concat, DataFrame - - -from openfisca_france_data.erfs.input_data_builder.base import ( - year_specific_by_generic_data_frame_name) -from openfisca_france_data.erfs import get_erf2of, get_of2erf -from openfisca_plugin_aggregates.aggregates import Aggregates -from openfisca_survey_manager.statshelpers import mark_weighted_percentiles as mwp -from openfisca_survey_manager.survey_collections import SurveyCollection - - -from openfisca_parsers import input_variables_extractors - -log = logging.getLogger(__name__) - - -def clean(parameter): - return parameter[:-len('_holder')] if parameter.endswith('_holder') else parameter - - -class Debugger(object): - def __init__(self): - super(Debugger, self).__init__() - self.erf_menage = None - self.erf_eec_indivi = None - self.of_menages_data_frame = None - self.of_individus_data_frame = None - self.variable = None - self.survey_scenario = None - - def set_survey_scenario(self, survey_scenario = None): - assert survey_scenario is not None - self.survey_scenario = survey_scenario - self.variables = self.survey_scenario.simulation.tax_benefit_system.variables - self.simulation = self.survey_scenario.simulation - assert survey_scenario.simulation is not None, "The simulation attibute of survey_scenario is None" - - def set_variable(self, variable): - if isinstance(variable, list): - self.variable = variable[0] - else: - self.variable = variable - - def show_aggregates(self): - from openfisca_france_data.erf.aggregates import build_erf_aggregates - - assert self.survey_scenario is not None, 'simulation attribute is None' - assert self.variable is not None, 'variable attribute is None' - variable = self.variable - openfisca_aggregates = Aggregates() - openfisca_aggregates.set_survey_scenario(self.survey_scenario) - openfisca_aggregates.compute() - - variables = self.variables - temp = (build_erf_aggregates(variables=[variable], year= self.survey_scenario.year)) - selection = openfisca_aggregates.aggr_frame["Mesure"] == variables[variable].label - print(openfisca_aggregates.aggr_frame[selection]) - print(temp) - # TODO: clean this - return - - def extract(self, data_frame, entities = "men"): - variables = self.variables - filtered_data_frame_columns = list(set(variables.keys()).intersection(set(data_frame.columns))) - extracted_columns = [column_name for column_name in filtered_data_frame_columns - if variables[column_name].entity in entities] - extracted_columns = list(set(extracted_columns).union(set(['idmen']))) - return data_frame[extracted_columns].copy() - - def get_all_parameters(self, column_list): - global x - print([column.name for column in column_list]) - x = x + 1 - if x == 20: - boum - variables = self.variables - tax_benefit_system = self.survey_scenario.simulation.tax_benefit_system - - extractor = input_variables_extractors.setup(tax_benefit_system) - - if len(column_list) == 0: - return [] - else: - column_name = column_list[0].name - print(column_name) - if extractor.get_input_variables(variables[column_name]) is None: - return column_list - else: - first_column = [column_list[0]] - input_columns = self.get_all_parameters([ - variables[clean(parameter)] - for parameter in list(extractor.get_input_variables(variables[column_name])) - ]) - other_columns = list( - set(self.get_all_parameters(column_list[1:])) - set(first_column + input_columns) - ) - print('input_variables: ', [column.name for column in input_columns]) - - print('new_variables: ', [column.name for column in other_columns]) - - new_column_list = first_column + input_columns + other_columns - print('final list: ', [column.name for column in new_column_list]) - return new_column_list - - def build_columns_to_fetch(self): - variables = self.variables -# parameters_column = self.get_all_parameters([variables.get(x) for x in [self.variable]]) -# parameters = [x.name for x in parameters_column] - parameters = [self.variable] - # We want to get all parameters and consumers that we're going to encounter -# consumers = [] -# for variable in [self.variable]: -# column = variables.get(variable) -# consumers = list(set(consumers).union(set(column.consumers))) -# column_names = list(set(parameters).union(set(consumers))) - - # self.columns_to_fetch = column_names - # self.variable_consumers = list(set(consumers)) - self.variable_parameters = list(set(parameters)) - self.columns_to_fetch = list(set(parameters)) - - def build_openfisca_data_frames(self): - column_names = self.columns_to_fetch - for column in column_names: - assert column in survey_scenario.tax_benefit_system.variables.keys() - data_frame_by_entity_key_plural = survey_scenario.create_data_frame_by_entity( - variables = column_names + ['idmen_original'], - indices = True, - roles = True, - ) - self.data_frame_by_entity_key_plural = data_frame_by_entity_key_plural - - projected = self.project_on(data_frame_by_entity_key_plural = data_frame_by_entity_key_plural) - idmen_original_by_idmen = dict( - zip( - data_frame_by_entity_key_plural['menages'].index.values, - data_frame_by_entity_key_plural['menages']["idmen_original"].values - ) - ) - self.idmen_original_by_idmen = idmen_original_by_idmen - - idmen_by_idmen_original = dict( - zip( - data_frame_by_entity_key_plural['menages']["idmen_original"].values, - data_frame_by_entity_key_plural['menages'].index.values, - ) - ) - self.idmen_by_idmen_original = idmen_by_idmen_original - - data_frame_by_entity_key_plural['menages'] = projected.rename( - columns = {"idmen_original": "idmen"}) - data_frame_by_entity_key_plural['individus'].replace( - {'idmen': idmen_original_by_idmen}, inplace = True) - self.data_frame_by_entity_key_plural = data_frame_by_entity_key_plural - - def project_on(self, receiving_entity_key_plural = 'menages', data_frame_by_entity_key_plural = None): - tax_benefit_system = self.survey_scenario.tax_benefit_system - assert data_frame_by_entity_key_plural is not None - assert receiving_entity_key_plural is not tax_benefit_system.person_key_plural - - entity_data_frame = data_frame_by_entity_key_plural[receiving_entity_key_plural] - person_data_frame = data_frame_by_entity_key_plural[tax_benefit_system.person_key_plural] - - entity_keys_plural = list( - set(tax_benefit_system.entity_class_by_key_plural.keys()).difference(set( - [tax_benefit_system.person_key_plural, receiving_entity_key_plural] - )) - ) - - for entity_key_plural in entity_keys_plural: - entity = tax_benefit_system.entity_class_by_key_plural[entity_key_plural] - # Getting only heads of other entities prenent in the projected on entity - boolean_index = person_data_frame[entity.role_for_person_variable_name] == 0 # Heads - index_entity = person_data_frame.loc[boolean_index, entity.index_for_person_variable_name].values # Ent. - for column_name, column_series in self.data_frame_by_entity_key_plural[entity_key_plural].items(): - person_data_frame.loc[boolean_index, column_name] \ - = column_series.iloc[index_entity].values - person_data_frame[column_name].fillna(0) - - receiving_entity = tax_benefit_system.entity_class_by_key_plural[receiving_entity_key_plural] - grouped_data_frame = person_data_frame.groupby(by = receiving_entity.index_for_person_variable_name).agg(sum) - grouped_data_frame.drop(receiving_entity.role_for_person_variable_name, axis = 1, inplace = True) - data_frame = concat([entity_data_frame, grouped_data_frame], axis = 1) - - assert data_frame.notnull().all().all() - return data_frame - - def build_erf_data_frames(self): - # TODO: remove this - self.columns_to_fetch = ['af'] - variables = self.columns_to_fetch - erf_survey_collection = SurveyCollection.load( - collection = "erfs", config_files_directory = config_files_directory) - erf_survey = erf_survey_collection.get_survey("erfs_{}".format(year)) - year_specific_by_generic = year_specific_by_generic_data_frame_name(year) - generic_by_year_specific = dict(zip(year_specific_by_generic.values(), year_specific_by_generic.keys())) - - erf_variables = list(set(variables + ["ident", "wprm", "quelfic", "noi"])) - of2erf = get_of2erf() - for index, variable in enumerate(erf_variables): - if variable in of2erf: - erf_variables[index] = of2erf[variable] - data_frame_by_table = dict(eec_indivi = None, erf_indivi = None, erf_menage = None) - erf_variables_by_generic_table = dict(eec_indivi = [], erf_indivi = [], erf_menage = []) - - year_specific_tables_by_erf_variable = dict( - [ - ( - erf_variable, - set( - erf_survey.find_tables(variable = erf_variable) - ).intersection( - set([year_specific_by_generic[key] for key in erf_variables_by_generic_table.keys()]) - ) - ) for erf_variable in erf_variables - ] - ) - for variable, year_specific_tables in year_specific_tables_by_erf_variable.items(): - if len(year_specific_tables) < 1: - log.info("No tables are present for variable {}".format(variable)) - continue - else: - log.info("Variable {} is present in multiple tables : {}".format(variable, year_specific_tables)) - for table in year_specific_tables: - log.info("Variable {} is retrieved from table {}".format(variable, table)) - erf_variables_by_generic_table[generic_by_year_specific[table]].append(variable) - - erf2of = get_erf2of() - - for table, erf_variables in erf_variables_by_generic_table.items(): - if erf_variables: - data_frame_by_table[table] = erf_survey.get_values( - variables = erf_variables, table = year_specific_by_generic[table] - ) - data_frame_by_table[table].rename(columns = erf2of, inplace = True) - data_frame_by_table[table].rename(columns = {'ident': 'idmen'}, inplace = True) - - assert not data_frame_by_table["erf_menage"].duplicated().any(), "Duplicated idmen in erf_menage" - self.erf_data_frame_by_entity_key_plural = dict( - menages = data_frame_by_table["erf_menage"], - individus = data_frame_by_table["erf_indivi"].merge(data_frame_by_table["eec_indivi"]) - ) - # TODO: fichier foyer - - def get_major_differences(self): - variable = self.variable - - of_menages_data_frame = self.data_frame_by_entity_key_plural['menages'] - erf_menages_data_frame = self.erf_data_frame_by_entity_key_plural['menages'] - - merged_menage_data_frame = merge( - erf_menages_data_frame[[variable, 'idmen']], - of_menages_data_frame[[variable, 'idmen']], - on = 'idmen', - how = 'inner', - suffixes = ('_erf', '_of') - ) - - log.info('Length of merged_menage_data_frame is {}'.format(len(merged_menage_data_frame))) - merged_menage_data_frame.set_index('idmen', drop = False, inplace = True) - table = merged_menage_data_frame[ - numpy.logical_and( - merged_menage_data_frame[variable + '_erf'] != 0, - merged_menage_data_frame[variable + '_of'] != 0 - ) - ] - table[variable + "_rel_diff"] = (table[variable + '_of'] - table[variable + '_erf']) \ - / table[variable + '_erf'] # Difference relative - log.info( - "Minimum difference between the two tables for {} is {}".format( - variable, str(table[variable + "_rel_diff"].min()) - ) - ) - log.info( - "Maximum difference between the two tables for {} is {}".format( - variable, str(table[variable + "_rel_diff"].max()) - ) - ) - table[variable + '_ratio'] = ( - table[variable + '_of'] / table[variable + '_erf'] - ) - log.info(table[variable + "_rel_diff"].describe()) - try: - assert len(table[variable + "_rel_diff"]) == len(table['wprm_of']), "PINAGS" - dec, values = mwp( - table[variable + "_rel_diff"], - numpy.arange(1, 11), table['wprm_of'], - 2, - return_quantiles = True - ) - log.info(sorted(values)) - dec, values = mwp( - table[variable + "_rel_diff"], - numpy.arange(1, 101), - table['wprm_erf'], - 2, - return_quantiles = True - ) - log.info(sorted(values)[90:]) - del dec, values - except Exception: - log.info('Weighted percentile method did not work for {}'.format(variable + "_rel_diff")) - pass - table.sort(columns = variable + "_rel_diff", ascending = False, inplace = True) - - print(table[:10].to_string()) - return table - - def describe_discrepancies(self, fov = 10, consumers = False, parameters = True, descending = True, to_men = False): - variable = self.variable - major_differences_data_frame = self.get_major_differences() - major_differences_data_frame.sort( - columns = self.variable + "_rel_diff", - ascending = not descending, - inplace = True - ) - debug_data_frame = major_differences_data_frame[0:fov].copy() - del major_differences_data_frame - - of_menages_data_frame = self.data_frame_by_entity_key_plural['menages'] - of_individus_data_frame = self.data_frame_by_entity_key_plural['individus'] - erf_individus_data_frame = self.erf_data_frame_by_entity_key_plural['individus'] - erf_menages_data_frame = self.erf_data_frame_by_entity_key_plural['menages'] - return debug_data_frame - - kept_columns = set() - if parameters: - kept_columns.update(set(self.variable_parameters)) - if consumers: - kept_columns.update(set(self.variable_consumers)) - kept_columns = list(kept_columns) - kept_columns = list(set(kept_columns).union( - set(['idmen', 'idfam', 'idfoy', 'quimen', 'quifam', 'quifoy'] + list(major_differences_data_frame.columns))) - ) - - if to_men: - entities_ind = ['ind'] - entities_men = ['men', 'fam', 'foy'] - else: - entities_ind = ['ind', 'fam', 'foy'] - entities_men = ['men'] - - debug_data_frame = debug_data_frame.merge( - self.extract(of_menages_data_frame, entities = entities_men), - how = 'inner', - on = 'idmen', - ) - - print(debug_data_frame.to_string()) - - debug_data_frame = debug_data_frame.merge( - self.extract(of_individus_data_frame, entities = entities_ind), - how = 'inner', - on = 'idmen', - ) - - debug_data_frame = debug_data_frame.merge( - erf_individus_data_frame, - how = 'inner', - on = 'idmen', - ) - - suffixes = ["_erf", "_of", "_rel_diff", "_ratio"] - reordered_columns = [variable + suffixe for suffixe in suffixes] \ - + ["idmen", "quimen", "idfam", "quifam", "idfoy", "quifoy"] - reordered_columns = reordered_columns + list(set(kept_columns) - set(reordered_columns)) - debug_data_frame = debug_data_frame[reordered_columns].copy() - return debug_data_frame - - def generate_test_case(self): - entity_class_by_key_plural = self.survey_scenario.tax_benefit_system.entity_class_by_key_plural - menages_entity = entity_class_by_key_plural['menages'] - idmen_by_idmen_original = self.idmen_by_idmen_original - idmen_original = self.describe_discrepancies(descending = False)[ - menages_entity.index_for_person_variable_name].iloc[0] - idmen = idmen_by_idmen_original[idmen_original] - input_data_frame = self.survey_scenario.input_data_frame - individus_index = input_data_frame.index[ - input_data_frame[menages_entity.index_for_person_variable_name] == idmen] - index_by_entity = { - entity_class_by_key_plural['individus']: individus_index, - } - for entity in list(entity_class_by_key_plural.values()): - if entity.key_plural != 'individus': - index_by_entity[entity] = input_data_frame.loc[ - individus_index, entity.index_for_person_variable_name].unique() - - extracted_indices = individus_index - for entity, entity_index in index_by_entity.items(): - if entity.key_plural in ['menages', 'individus']: - continue - extracted_indices = extracted_indices + \ - input_data_frame.index[input_data_frame[entity.index_for_person_variable_name].isin(entity_index)] - - extracted_input_data_frame = input_data_frame.loc[extracted_indices] - return extracted_input_data_frame - - -if __name__ == '__main__': - import sys - from openfisca_plugin_aggregates.tests.test_aggregates import create_survey_scenario - logging.basicConfig(level = logging.INFO, stream = sys.stdout) - restart = True - if restart: - year = 2009 - survey_scenario = create_survey_scenario(year) - survey_scenario.simulation = survey_scenario.new_simulation() - - debugger = Debugger() - debugger.set_survey_scenario(survey_scenario = survey_scenario) - debugger.set_variable('af') - debugger.build_columns_to_fetch() - debugger.build_openfisca_data_frames() - debugger.build_erf_data_frames() - - # df_menage = debugger.data_frame_by_entity_key_plural['menages'] - # df_famille = debugger.data_frame_by_entity_key_plural['familles'] - # df_individus = debugger.data_frame_by_entity_key_plural['individus'] - - # df = debugger.get_major_differences() - - # debugger.show_aggregates() - df = debugger.describe_discrepancies(descending = False) - df = debugger.generate_test_case() - - boum - entity_class_by_key_plural = debugger.survey_scenario.tax_benefit_system.entity_class_by_key_plural - menages_entity = entity_class_by_key_plural['menages'] - - idmen = debugger.describe_discrepancies(descending = False)[menages_entity.index_for_person_variable_name].iloc[0] - - input_data_frame = debugger.survey_scenario.input_data_frame diff --git a/openfisca_france_data/erfs_fpr/comparison.py b/openfisca_france_data/erfs_fpr/comparison.py index f8e0b170..fc4bfb45 100644 --- a/openfisca_france_data/erfs_fpr/comparison.py +++ b/openfisca_france_data/erfs_fpr/comparison.py @@ -24,6 +24,9 @@ "ric_i": "ric", "rnc_i": "rnc", "salaires_i": "salaire_imposable", + "rev_fonciers_bruts": "revenu_categoriel_foncier_menage", + "rev_valeurs_mobilieres_bruts": "revenus_capitaux_prelevement_forfaitaire_unique_ir_menage", + "rev_financier_prelev_lib_imputes": "rev_financier_prelev_lib_imputes_menage", } @@ -40,6 +43,17 @@ class ErfsFprtoInputComparator(AbstractComparator): "salaire_imposable", ] + from openfisca_france_data.erfs_fpr.get_survey_scenario import menage_projected_variables + + target_menage_projected_variables = [ + f"{menage_projected_variable}_menage" + for menage_projected_variable + in menage_projected_variables + ] + + default_target_variables += target_menage_projected_variables + + def compute_test_dataframes(self): erfs_fpr_survey_collection = SurveyCollection.load(collection = "erfs_fpr") # infer names of the survey and data tables diff --git a/openfisca_france_data/erfs_fpr/get_survey_scenario.py b/openfisca_france_data/erfs_fpr/get_survey_scenario.py index 290d2f1a..4f8e065c 100644 --- a/openfisca_france_data/erfs_fpr/get_survey_scenario.py +++ b/openfisca_france_data/erfs_fpr/get_survey_scenario.py @@ -6,7 +6,7 @@ from openfisca_core.reforms import Reform # type: ignore from openfisca_core.taxbenefitsystems import TaxBenefitSystem # type: ignore -from openfisca_france.entities import Individu +from openfisca_france.entities import Individu, FoyerFiscal, Menage from openfisca_france_data.erfs_fpr.scenario import ErfsFprSurveyScenario from openfisca_france_data import france_data_tax_benefit_system @@ -27,6 +27,13 @@ ] +menage_projected_variables = [ + # "rev_financier_prelev_lib_imputes", + "revenu_categoriel_foncier", + "revenus_capitaux_prelevement_forfaitaire_unique_ir", + ] + + class erfs_fpr_plugin(Reform): name = "ERFS-FPR ids plugin" @@ -36,7 +43,7 @@ def apply(self): class_name = f"{variable}_annuel" label = f"{variable} sur l'année entière" - def formula_creator(variable): + def annual_formula_creator(variable): def formula(individu, period): result = individu(variable, period, options = [ADD]) return result @@ -46,16 +53,42 @@ def formula(individu, period): return formula variable_instance = type(class_name, (Variable,), dict( - value_type = int, + value_type = float, entity = self.variables[variable].entity, label = label, definition_period = YEAR, - formula = formula_creator(variable), + formula = annual_formula_creator(variable), )) self.add_variable(variable_instance) del variable_instance + for variable in menage_projected_variables: + class_name = f"{variable}_menage" + label = f"{variable} agrégée à l'échelle du ménage" + + def projection_formula_creator(variable): + def formula(menage, period): + result_i = menage.members.foyer_fiscal(variable, period, options = [ADD]) + result = menage.sum(result_i, role = FoyerFiscal.DECLARANT_PRINCIPAL) + return result + + formula.__name__ = 'formula' + + return formula + + variable_instance = type(class_name, (Variable,), dict( + value_type = float, + entity = Menage, + label = label, + definition_period = YEAR, + formula = projection_formula_creator(variable), + )) + + self.add_variable(variable_instance) + del variable_instance + + self.add_variable(idmen_original) self.add_variable(noindiv) @@ -111,6 +144,7 @@ def get_survey_scenario( # S'il n'y a pas de données, on sait où les trouver. if data is None: input_data_table_by_entity = dict( + foyer_fiscal = f"foyer_fiscal_{year}", individu = f"individu_{year}", menage = f"menage_{year}", ) @@ -120,7 +154,6 @@ def get_survey_scenario( data = dict( input_data_table_by_entity_by_period = input_data_table_by_entity_by_period, - # input_data_survey_prefix = "openfisca_erfs_fpr_data", survey = survey_name ) diff --git a/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py b/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py index f2037474..6844c59f 100644 --- a/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py +++ b/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py @@ -18,7 +18,8 @@ # step_02_imputation_loyer as imputation_loyer, step_03_variables_individuelles as variables_individuelles, step_04_famille as famille, - step_05_final as final, + step_05_foyer as foyer, + step_06_final as final, ) log = logging.getLogger(__name__) @@ -48,7 +49,7 @@ def build(year: int, export_flattened_df_filepath: str = None) -> None: # - On merge les tables individus / menages # # Note : c'est ici où on objectivise les hypothèses, step 1 - log.info('\n [[[ Year {} - Step 1 / 5 ]]] \n'.format(year)) + log.info('\n [[[ Year {} - Step 1 / 6 ]]] \n'.format(year)) preprocessing.build_merged_dataframes(year = year) # Step 02 : Si on veut calculer les allocations logement, il faut faire le matching avec une autre enquête (ENL) @@ -57,10 +58,10 @@ def build(year: int, export_flattened_df_filepath: str = None) -> None: # stata_directory = openfisca_survey_collection.config.get('data', 'stata_directory') # stata_file = os.path.join(stata_directory, 'log_men_ERFS.dta') # imputation_loyer.merge_imputation_loyer(stata_file = stata_file, year = year) - log.info('\n [[[ Year {} - Step 2 / 5 SKIPPED ]]] \n'.format(year)) + log.info('\n [[[ Year {} - Step 2 / 6 SKIPPED ]]] \n'.format(year)) # Step 03 : on commence par les variables indivuelles - log.info('\n [[[ Year {} - Step 3 / 5 ]]] \n'.format(year)) + log.info('\n [[[ Year {} - Step 3 / 6 ]]] \n'.format(year)) variables_individuelles.build_variables_individuelles(year = year) # Step 04 : ici on va constituer foyer et famille à partir d'invididu et ménage @@ -69,15 +70,18 @@ def build(year: int, export_flattened_df_filepath: str = None) -> None: # - On va faire des suppositions pour faire les familles # - On va faire les foyers fiscaux à partir des familles # - On va faire de suppositions pour faire les foyers fiscaux - log.info('\n [[[ Year {} - Step 4 / 5 ]]] \n'.format(year)) + log.info('\n [[[ Year {} - Step 4 / 6 ]]] \n'.format(year)) famille.build_famille(year = year) - + + log.info('\n [[[ Year {} - Step 5 / 6 ]]] \n'.format(year)) + foyer.build_variables_foyers_fiscal(year = year) + # Affreux ! On injectait tout dans un même DataFrame !!! # C'est très moche ! # # On crée une df par entité par période. # Elles sont stockées dans un fichier h5 - log.info('\n [[[ Year {} - Step 5 / 5 ]]] \n'.format(year)) + log.info('\n [[[ Year {} - Step 6 / 6 ]]] \n'.format(year)) final.create_input_data_frame(year = year, export_flattened_df_filepath = export_flattened_df_filepath) diff --git a/openfisca_france_data/erfs_fpr/input_data_builder/step_03_variables_individuelles.py b/openfisca_france_data/erfs_fpr/input_data_builder/step_03_variables_individuelles.py index 7b895658..294e3779 100644 --- a/openfisca_france_data/erfs_fpr/input_data_builder/step_03_variables_individuelles.py +++ b/openfisca_france_data/erfs_fpr/input_data_builder/step_03_variables_individuelles.py @@ -56,11 +56,11 @@ def build_variables_individuelles(temporary_store = None, year = None): # helpers def create_variables_individuelles(individus, year, survey_year = None, revenu_type = 'imposable'): - """Création des variables individuelles""" + """Création des variables individuelles.""" period = periods.period(year) tax_benefit_system = openfisca_france_tax_benefit_system - # variables démographiques + # variables démographiques create_ages(individus, year) create_date_naissance(individus, age_variable = None, annee_naissance_variable = 'naia', mois_naissance = 'naim', year = year) @@ -82,14 +82,15 @@ def create_variables_individuelles(individus, year, survey_year = None, revenu_t individus, period = period, revenu_type = revenu_type, - tax_benefit_system = tax_benefit_system + tax_benefit_system = tax_benefit_system, ) create_traitement_indiciaire_brut( - individus, - period = period, + individus, + period = period, revenu_type = revenu_type, - tax_benefit_system = tax_benefit_system) - + tax_benefit_system = tax_benefit_system, + ) + # Pour les cotisations patronales qui varient avec la taille de l'entreprise' create_effectif_entreprise(individus, period = period, survey_year = survey_year) @@ -900,7 +901,7 @@ def create_revenus_remplacement_bruts(individus): """ Reconstitution des variables de retraite et chomage brut à partir des variables nettes et des variables de csg et crds """ - + # revenu_brut = revenu_net + csg_deductible + csg_non_deductible_crds individus['chomage_brut'] = individus.chomage_imposable + individus.csgchod_i #+ individus. csg_nd_crds_cho_i individus['retraite_brute'] = individus.retraite_imposable + individus.csgrstd_i #+ individus.csg_nd_crds_ret_i diff --git a/openfisca_france_data/erfs_fpr/input_data_builder/step_05_foyer.py b/openfisca_france_data/erfs_fpr/input_data_builder/step_05_foyer.py new file mode 100644 index 00000000..4fc912ae --- /dev/null +++ b/openfisca_france_data/erfs_fpr/input_data_builder/step_05_foyer.py @@ -0,0 +1,61 @@ +import logging +import pandas as pd + + +from openfisca_survey_manager.temporary import temporary_store_decorator # type: ignore + + +log = logging.getLogger(__name__) + + +@temporary_store_decorator(file_name = 'erfs_fpr') +def build_variables_foyers_fiscal(temporary_store = None, year = None): + + assert temporary_store is not None + assert year is not None + + individus = temporary_store['individus_{}'.format(year)] + menages = temporary_store['menages_{}'.format(year)] + + individus['idfoy'] = individus['idfam'].copy() + individus['quifoy'] = individus['quifam'].copy() + + foyers_fiscaux = individus[['idfoy','ident',]].drop_duplicates() + foyers_fiscaux = pd.merge( + menages[[ + 'ident', + 'rev_financier_prelev_lib_imputes', + 'rev_fonciers_bruts', + 'rev_valeurs_mobilieres_bruts', + 'wprm', + ]], + foyers_fiscaux, + how = 'inner', + on = 'ident' + ) + # première version pour splitter les revenus du capital du ménage dans les foyers fiscaux + # on attribue l'ensemble des revenus du capital du ménage au foyer avec la personne ayant les plus hauts revenus + # procédure à améliorer + idfoy = (individus + .sort_values( + [ + 'ident', + 'salaire_de_base', + 'traitement_indiciaire_brut', + 'retraite_brute' + ], + ascending = False + ) + .groupby('ident') + .first() + .idfoy + ) + foyers_fiscaux['revenu_categoriel_foncier'] = foyers_fiscaux['rev_fonciers_bruts'] * foyers_fiscaux.idfoy.isin(idfoy) + foyers_fiscaux['revenus_capitaux_prelevement_forfaitaire_unique_ir'] = foyers_fiscaux['rev_valeurs_mobilieres_bruts'] * foyers_fiscaux.idfoy.isin(idfoy) + foyers_fiscaux['rev_financier_prelev_lib_imputes'] = foyers_fiscaux['rev_financier_prelev_lib_imputes'] * foyers_fiscaux.idfoy.isin(idfoy) + + #assert sum(menages.wprm * menages.rev_fonciers_bruts) == sum(foyers_fiscaux.rev_fonciers_bruts * foyers_fiscaux.wprm) + #assert sum(menages.wprm * menages.rev_valeurs_mobilieres_bruts) == sum(foyers_fiscaux.rev_financier_prelev_lib_imputes * foyers_fiscaux.wprm) + #assert sum(menages.wprm * menages.rev_financier_prelev_lib_imputes) == sum(foyers_fiscaux.rev_valeurs_mobilieres_bruts * foyers_fiscaux.wprm) + + temporary_store[f"foyers_fiscaux_{year}"] = foyers_fiscaux diff --git a/openfisca_france_data/erfs_fpr/input_data_builder/step_05_final.py b/openfisca_france_data/erfs_fpr/input_data_builder/step_06_final.py similarity index 84% rename from openfisca_france_data/erfs_fpr/input_data_builder/step_05_final.py rename to openfisca_france_data/erfs_fpr/input_data_builder/step_06_final.py index fccaa092..c95f79d8 100644 --- a/openfisca_france_data/erfs_fpr/input_data_builder/step_05_final.py +++ b/openfisca_france_data/erfs_fpr/input_data_builder/step_06_final.py @@ -16,9 +16,10 @@ def create_input_data_frame(temporary_store = None, year = None, export_flattene individus = temporary_store['individus_{}'.format(year)] menages = temporary_store['menages_{}'.format(year)] + foyers_fiscaux = temporary_store['foyers_fiscaux_{}'.format(year)] # ici : variables à garder - variables = [ + var_individus = [ 'activite', 'age', 'categorie_salarie', @@ -39,13 +40,27 @@ def create_input_data_frame(temporary_store = None, year = None, export_flattene 'retraite_brute', 'ric', 'rnc', - 'statut_marital', - # 'salaire_imposable', 'salaire_de_base', - "traitement_indiciaire_brut", + 'statut_marital', "primes_fonction_publique", + "traitement_indiciaire_brut", + ] + var_foyers_fiscaux = [ + 'idfoy', + 'rev_financier_prelev_lib_imputes', + 'revenu_categoriel_foncier', + 'revenus_capitaux_prelevement_forfaitaire_unique_ir', ] + var_menages = [ + 'idmen', + 'loyer', + 'statut_occupation_logement', + 'taxe_habitation', + 'wprm', + 'zone_apl', + ] + # TODO: fix this simplistic inference individus.rename(columns = { 'ric_net': 'ric', @@ -56,7 +71,7 @@ def create_input_data_frame(temporary_store = None, year = None, export_flattene ) individus = create_ids_and_roles(individus) - individus = individus[variables].copy() + individus = individus[var_individus].copy() gc.collect() # This looks like it could have a sizeable impact @@ -71,7 +86,7 @@ def create_input_data_frame(temporary_store = None, year = None, export_flattene menages = extract_menages_variables(menages) - individus = create_collectives_foyer_variables(individus, menages) + # individus = create_collectives_foyer_variables(individus, menages) idmens = individus.idmen.unique() menages = menages.loc[ @@ -92,11 +107,20 @@ def create_input_data_frame(temporary_store = None, year = None, export_flattene menages = menages.rename(columns = {'idmen':'idmen_original'}) unique_idmen = individus[['idmen','idmen_original']].drop_duplicates() assert len(unique_idmen) == len(menages), "Number of idmen should be the same individus and menages tables." - menages = menages.merge( - unique_idmen, - how = 'inner', - on = 'idmen_original' - ) + + menages = menages.merge(unique_idmen, + how = 'inner', + on = 'idmen_original') + + foyers_fiscaux = foyers_fiscaux.rename(columns = {'idfoy':'idfoy_original'}) + unique_idfoy = individus[['idfoy','idfoy_original']].drop_duplicates() + assert len(unique_idmen) == len(menages), "Number of idfoy should be the same individus and foyers tables." + + foyers_fiscaux = foyers_fiscaux.merge(unique_idfoy, + how = 'inner', + on = 'idfoy_original') + + foyers_fiscaux = foyers_fiscaux[var_foyers_fiscaux] if export_flattened_df_filepath: supermerge = individus.merge( @@ -117,6 +141,18 @@ def create_input_data_frame(temporary_store = None, year = None, export_flattene collection = "openfisca_erfs_fpr", survey_name = survey_name, ) + + foyers_fiscaux = foyers_fiscaux.sort_values(by = ['idfoy']) + log.debug(f"Saving entity 'foyers fiscaux' in collection 'openfisca_erfs_fpr' and survey name '{survey_name}' with set_table_in_survey") + set_table_in_survey( + foyers_fiscaux, + entity = "foyer_fiscal", + period = year, + collection = "openfisca_erfs_fpr", + survey_name = survey_name, + ) + log.debug("End of create_input_data_frame") + menages = menages.sort_values(by = ['idmen']) log.debug(f"Saving entity 'menage' in collection 'openfisca_erfs_fpr' and survey name '{survey_name}' with set_table_in_survey") set_table_in_survey( @@ -184,7 +220,7 @@ def create_collectives_foyer_variables(individus, menages): assert set(foyers_revenus_fonciers.columns) == set(['idfoy', 'rev_fonciers_bruts', 'quifoy']) individus = individus.merge(foyers_revenus_fonciers, how = 'outer', on = ['idfoy', 'quifoy']) assert set(idmens) == set(individus .query('(rev_fonciers_bruts > 0)')['idmen'].tolist()) - individus.rename(columns = {'rev_fonciers_bruts': 'f4ba'}, inplace = True) + individus.rename(columns = {'rev_fonciers_bruts': 'revenu_categoriel_foncier'}, inplace = True) return individus @@ -229,16 +265,13 @@ def extract_menages_variables_from_store(temporary_store = None, year = None): def extract_menages_variables(menages): - variables = ['ident', 'wprm', 'taxe_habitation', 'rev_fonciers_bruts'] + variables = ['ident', 'wprm', 'taxe_habitation'] external_variables = ['loyer', 'zone_apl', 'statut_occupation_logement'] for external_variable in external_variables: if external_variable in menages.columns: log.debug("Found {} in menages table: we keep it".format(external_variable)) variables.append(external_variable) - # TODO: 2007-2010 ont la variable rev_fonciers et non pas rev_fonciers_bruts. Est-ce la même? - menages = menages.rename(columns={'rev_fonciers': 'rev_fonciers_bruts'}) menages = menages[variables].copy() - menages.taxe_habitation = - menages.taxe_habitation # taxes should be negative menages.rename(columns = dict(ident = 'idmen'), inplace = True) return menages diff --git a/openfisca_france_data/erfs_fpr/scenario.py b/openfisca_france_data/erfs_fpr/scenario.py index dca44945..7f81adea 100644 --- a/openfisca_france_data/erfs_fpr/scenario.py +++ b/openfisca_france_data/erfs_fpr/scenario.py @@ -25,6 +25,9 @@ class ErfsFprSurveyScenario(AbstractErfsSurveyScenario): "rag", "retraite_brute", "retraite_imposable", + # "rev_financier_prelev_lib_imputes", + "revenu_categoriel_foncier", + "revenus_capitaux_prelevement_forfaitaire_unique_ir", "ric", "rnc", "salaire_de_base", From 54b5e9898ac1b81870393ff18a41e2d07226881c Mon Sep 17 00:00:00 2001 From: Mahdi Ben Jelloul Date: Fri, 31 Mar 2023 09:54:15 +0000 Subject: [PATCH 04/41] Ajout variable apl --- openfisca_france_data/comparator.py | 6 ++++-- openfisca_france_data/erfs_fpr/comparison.py | 7 ++++++- .../erfs_fpr/input_data_builder/__init__.py | 6 ++++-- .../input_data_builder/step_02_menage.py | 18 ++++++++++++++++++ .../step_03_variables_individuelles.py | 5 +++++ .../input_data_builder/step_05_foyer.py | 4 ---- .../input_data_builder/step_06_final.py | 6 +++--- 7 files changed, 40 insertions(+), 12 deletions(-) create mode 100644 openfisca_france_data/erfs_fpr/input_data_builder/step_02_menage.py diff --git a/openfisca_france_data/comparator.py b/openfisca_france_data/comparator.py index 39c27541..d788bfe2 100644 --- a/openfisca_france_data/comparator.py +++ b/openfisca_france_data/comparator.py @@ -159,11 +159,13 @@ def create_variable_distribution_figures(variable, result, bins = None, figures_ unique_values_count = melted["value"].nunique() - bins == unique_values_count if unique_values_count < bins else bins + bins = unique_values_count if unique_values_count < bins else bins print(f"create_variable_distribution_figures (total): variable = {variable}, bins = {bins}") melted["value"] = melted["value"].clip(1, melted["value"].max()) + log_scale = bins > 10 + sns_plot = sns.histplot( data = melted, # palette = "crest", @@ -175,7 +177,7 @@ def create_variable_distribution_figures(variable, result, bins = None, figures_ hue = "variable", linewidth = 0, x = "value", - log_scale = True, + log_scale = log_scale, ) sns_plot.annotate( diff --git a/openfisca_france_data/erfs_fpr/comparison.py b/openfisca_france_data/erfs_fpr/comparison.py index fc4bfb45..cfb4e87b 100644 --- a/openfisca_france_data/erfs_fpr/comparison.py +++ b/openfisca_france_data/erfs_fpr/comparison.py @@ -24,6 +24,7 @@ "ric_i": "ric", "rnc_i": "rnc", "salaires_i": "salaire_imposable", + "logt": "statut_occupation_logement", "rev_fonciers_bruts": "revenu_categoriel_foncier_menage", "rev_valeurs_mobilieres_bruts": "revenus_capitaux_prelevement_forfaitaire_unique_ir_menage", "rev_financier_prelev_lib_imputes": "rev_financier_prelev_lib_imputes_menage", @@ -35,12 +36,13 @@ class ErfsFprtoInputComparator(AbstractComparator): period = None default_target_variables = [ "chomage_imposable", + "loyer", # "rag", "retraite_imposable", # "ric", # "rnc", - # "f4ba", "salaire_imposable", + "statut_occupation_logement", ] from openfisca_france_data.erfs_fpr.get_survey_scenario import menage_projected_variables @@ -78,6 +80,9 @@ def compute_test_dataframes(self): "individu": openfisca_individu, "menage": openfisca_menage, } + + fpr_menage.loyer = 12 * fpr_menage.loyer + target_dataframe_by_entity = { "individu": fpr_individu.rename(columns = openfisca_by_erfs_fpr_variables), "menage": fpr_menage.rename(columns = openfisca_by_erfs_fpr_variables), diff --git a/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py b/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py index 6844c59f..5dc8df4b 100644 --- a/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py +++ b/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py @@ -16,6 +16,7 @@ from openfisca_france_data.erfs_fpr.input_data_builder import ( step_01_preprocessing as preprocessing, # step_02_imputation_loyer as imputation_loyer, + step_02_menage as menage, step_03_variables_individuelles as variables_individuelles, step_04_famille as famille, step_05_foyer as foyer, @@ -59,6 +60,7 @@ def build(year: int, export_flattened_df_filepath: str = None) -> None: # stata_file = os.path.join(stata_directory, 'log_men_ERFS.dta') # imputation_loyer.merge_imputation_loyer(stata_file = stata_file, year = year) log.info('\n [[[ Year {} - Step 2 / 6 SKIPPED ]]] \n'.format(year)) + menage.build_variables_menage(year = year) # Step 03 : on commence par les variables indivuelles log.info('\n [[[ Year {} - Step 3 / 6 ]]] \n'.format(year)) @@ -72,10 +74,10 @@ def build(year: int, export_flattened_df_filepath: str = None) -> None: # - On va faire de suppositions pour faire les foyers fiscaux log.info('\n [[[ Year {} - Step 4 / 6 ]]] \n'.format(year)) famille.build_famille(year = year) - + log.info('\n [[[ Year {} - Step 5 / 6 ]]] \n'.format(year)) foyer.build_variables_foyers_fiscal(year = year) - + # Affreux ! On injectait tout dans un même DataFrame !!! # C'est très moche ! # diff --git a/openfisca_france_data/erfs_fpr/input_data_builder/step_02_menage.py b/openfisca_france_data/erfs_fpr/input_data_builder/step_02_menage.py new file mode 100644 index 00000000..538f1fc3 --- /dev/null +++ b/openfisca_france_data/erfs_fpr/input_data_builder/step_02_menage.py @@ -0,0 +1,18 @@ +import logging +import pandas as pd + + +from openfisca_survey_manager.temporary import temporary_store_decorator # type: ignore + +log = logging.getLogger(__name__) + + +@temporary_store_decorator(file_name = 'erfs_fpr') +def build_variables_menage(temporary_store = None, year = None): + if year >= 2018: + menages = temporary_store['menages_{}'.format(year)] + menages['zone_apl'] = 2 + # pour l'instant on met tout le monde à 2 mais à améliorer, peut être en fonction de la taille de l'aire urbaine ? + menages['statut_occupation_logement'] = menages['so'].copy() + menages.loc[(menages.statut_occupation_logement == 7),'statut_occupation_logement'] = 2 + temporary_store['menages_{}'.format(year)] = menages diff --git a/openfisca_france_data/erfs_fpr/input_data_builder/step_03_variables_individuelles.py b/openfisca_france_data/erfs_fpr/input_data_builder/step_03_variables_individuelles.py index 294e3779..92364169 100644 --- a/openfisca_france_data/erfs_fpr/input_data_builder/step_03_variables_individuelles.py +++ b/openfisca_france_data/erfs_fpr/input_data_builder/step_03_variables_individuelles.py @@ -90,6 +90,11 @@ def create_variables_individuelles(individus, year, survey_year = None, revenu_t revenu_type = revenu_type, tax_benefit_system = tax_benefit_system, ) + create_traitement_indiciaire_brut( + individus, + period = period, + revenu_type = revenu_type, + tax_benefit_system = tax_benefit_system) # Pour les cotisations patronales qui varient avec la taille de l'entreprise' create_effectif_entreprise(individus, period = period, survey_year = survey_year) diff --git a/openfisca_france_data/erfs_fpr/input_data_builder/step_05_foyer.py b/openfisca_france_data/erfs_fpr/input_data_builder/step_05_foyer.py index 4fc912ae..21683d06 100644 --- a/openfisca_france_data/erfs_fpr/input_data_builder/step_05_foyer.py +++ b/openfisca_france_data/erfs_fpr/input_data_builder/step_05_foyer.py @@ -54,8 +54,4 @@ def build_variables_foyers_fiscal(temporary_store = None, year = None): foyers_fiscaux['revenus_capitaux_prelevement_forfaitaire_unique_ir'] = foyers_fiscaux['rev_valeurs_mobilieres_bruts'] * foyers_fiscaux.idfoy.isin(idfoy) foyers_fiscaux['rev_financier_prelev_lib_imputes'] = foyers_fiscaux['rev_financier_prelev_lib_imputes'] * foyers_fiscaux.idfoy.isin(idfoy) - #assert sum(menages.wprm * menages.rev_fonciers_bruts) == sum(foyers_fiscaux.rev_fonciers_bruts * foyers_fiscaux.wprm) - #assert sum(menages.wprm * menages.rev_valeurs_mobilieres_bruts) == sum(foyers_fiscaux.rev_financier_prelev_lib_imputes * foyers_fiscaux.wprm) - #assert sum(menages.wprm * menages.rev_financier_prelev_lib_imputes) == sum(foyers_fiscaux.rev_valeurs_mobilieres_bruts * foyers_fiscaux.wprm) - temporary_store[f"foyers_fiscaux_{year}"] = foyers_fiscaux diff --git a/openfisca_france_data/erfs_fpr/input_data_builder/step_06_final.py b/openfisca_france_data/erfs_fpr/input_data_builder/step_06_final.py index c95f79d8..582e296f 100644 --- a/openfisca_france_data/erfs_fpr/input_data_builder/step_06_final.py +++ b/openfisca_france_data/erfs_fpr/input_data_builder/step_06_final.py @@ -80,9 +80,9 @@ def create_input_data_frame(temporary_store = None, year = None, export_flattene menages[k] = 0 # Again artificially putting missing variables in their default state - menages["loyer"] = 0 - menages["zone_apl"] = 2 - menages["statut_occupation_logement"] = 0 + #menages["loyer"] = 0 + #menages["zone_apl"] = 2 + #menages["statut_occupation_logement"] = 0 menages = extract_menages_variables(menages) From 7ef0d43e9b6839d1313f94e9147bb8df0a494246 Mon Sep 17 00:00:00 2001 From: benoit-cty <6603048+benoit-cty@users.noreply.github.com> Date: Tue, 28 Mar 2023 11:19:32 +0200 Subject: [PATCH 05/41] Add check-version-and-changelog --- .gitlab-ci.yml | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 16d1e951..e87029b8 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -139,6 +139,26 @@ diagnostics: tags: - openfisca +check-version-and-changelog: + stage: diagnostics + before_script: + - '' + needs: + - input_data-2019 + script: + - .github/is-version-number-acceptable.sh + +check-for-functional-changes: + stage: diagnostics + needs: + - check-version-and-changelog + before_script: + - '' + script: + - if `.github/has-functional-changes.sh` ; then echo "OK to build package" ; fi + when: + - master + run_on_all_years: stage: run_on_all_years From badfad8c0293536554ec7cebc88c948297dfeb9b Mon Sep 17 00:00:00 2001 From: benoit-cty <6603048+benoit-cty@users.noreply.github.com> Date: Tue, 28 Mar 2023 11:22:18 +0200 Subject: [PATCH 06/41] Add script for GitLab CI wip wip : adapt script wip wip wip wip wip wip Add emoji --- .github/has-functional-changes.sh | 2 +- .gitlab-ci.yml | 30 ++++++++------- ci-runner/has-functional-changes.sh | 18 +++++++++ ci-runner/is-version-number-acceptable.sh | 47 +++++++++++++++++++++++ 4 files changed, 82 insertions(+), 15 deletions(-) create mode 100755 ci-runner/has-functional-changes.sh create mode 100755 ci-runner/is-version-number-acceptable.sh diff --git a/.github/has-functional-changes.sh b/.github/has-functional-changes.sh index 48f97805..b010f0d1 100755 --- a/.github/has-functional-changes.sh +++ b/.github/has-functional-changes.sh @@ -3,7 +3,7 @@ IGNORE_DIFF_ON="README.md CONTRIBUTING.md Makefile .gitignore .github/*" last_tagged_commit=`git describe --tags --abbrev=0 --first-parent` # --first-parent ensures we don't follow tags not published in master through an unlikely intermediary merge commit - +echo ".github/has-functional-changes.sh : last_tagged_commit=$last_tagged_commit" if git diff-index --name-only --exit-code $last_tagged_commit -- . `echo " $IGNORE_DIFF_ON" | sed 's/ / :(exclude)/g'` # Check if any file that has not be listed in IGNORE_DIFF_ON has changed since the last tag was published. then echo "No functional changes detected." diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index e87029b8..b376ac35 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -7,6 +7,7 @@ variables: # OUT_FOLDER: "$CI_COMMIT_REF_NAME-$CI_COMMIT_SHORT_SHA" # For branch-commit_id OUT_FOLDER: "$CI_COMMIT_REF_NAME" # For just branch ROOT_FOLDER: "/mnt/data-out/openfisca-france-data" +# GIT_DEPTH: 1000 # To be able to get last tag (default 50) cache: paths: @@ -140,24 +141,25 @@ diagnostics: - openfisca check-version-and-changelog: - stage: diagnostics + # stage: diagnostics TODO: Put it back + stage: test before_script: - '' needs: - - input_data-2019 - script: - - .github/is-version-number-acceptable.sh - -check-for-functional-changes: - stage: diagnostics - needs: - - check-version-and-changelog - before_script: - - '' + # - input_data-2019 + - test script: - - if `.github/has-functional-changes.sh` ; then echo "OK to build package" ; fi - when: - - master + - ci-runner/is-version-number-acceptable.sh + +# check-for-functional-changes: +# stage: diagnostics +# needs: +# - check-version-and-changelog +# before_script: +# - '' +# script: +# - if `.github/has-functional-changes.sh` ; then echo "OK to build package" ; fi +# when: master run_on_all_years: diff --git a/ci-runner/has-functional-changes.sh b/ci-runner/has-functional-changes.sh new file mode 100755 index 00000000..f53acff4 --- /dev/null +++ b/ci-runner/has-functional-changes.sh @@ -0,0 +1,18 @@ +#! /usr/bin/env bash + +IGNORE_DIFF_ON="README.md CONTRIBUTING.md Makefile .gitignore .github/*" + +# Fetch all tags +git fetch --tags + +last_tagged_commit=`git tag --list | sort -V | grep -v ipp | tail -1` +echo "ci-runner/has-functional-changes.sh : avec git tag --list last_tagged_commit=$last_tagged_commit" + +# Check if any file that has not be listed in IGNORE_DIFF_ON has changed since the last tag was published. +if git diff-index --name-only --exit-code $last_tagged_commit -- . `echo " $IGNORE_DIFF_ON" | sed 's/ / :(exclude)/g'` +then + echo "No functional changes detected." + exit 1 +else + echo "The functional files above were changed." +fi diff --git a/ci-runner/is-version-number-acceptable.sh b/ci-runner/is-version-number-acceptable.sh new file mode 100755 index 00000000..ad7850a2 --- /dev/null +++ b/ci-runner/is-version-number-acceptable.sh @@ -0,0 +1,47 @@ +#! /usr/bin/env bash + +if [[ ${CI_COMMIT_REF_NAME} == master ]] +then + echo "No need for a version check on master.👍" + exit 0 +else + echo "Not on master." +fi + +if ! $(dirname "$BASH_SOURCE")/has-functional-changes.sh +then + echo "No need for a version update.👍" + exit 0 +else + echo "Need for a version update." +fi + +current_version=`python setup.py --version` + +if git rev-parse --verify --quiet $current_version +then + echo "Version $current_version already exists in commit:" + git --no-pager log -1 $current_version + echo + echo "Update the version number in setup.py before merging this branch into master.😒" + echo "Look at the CONTRIBUTING.md file to learn how the version number should be updated." + exit 1 +else + echo "Version $current_version don't exists in commit history 👍" +fi + +if ! $(dirname "$BASH_SOURCE")/has-functional-changes.sh | grep --quiet CHANGELOG.md +then + echo "CHANGELOG.md has not been modified, while functional changes were made. 😒" + echo "Explain what you changed before merging this branch into master." + echo "Look at the CONTRIBUTING.md file to learn how to write the changelog." + exit 2 +else + if ! grep --quiet $current_version CHANGELOG.md + then + echo "CHANGELOG.md has been modified. BUT $current_version don't exists in it. 😒" + exit 3 + else + echo "CHANGELOG.md has been modified and $current_version exists in it. 👍" + fi +fi From fb08269f51477283b97b3efe7a5d0117ed3497ce Mon Sep 17 00:00:00 2001 From: benoit-cty <4-benoit-cty@users.noreply.git.leximpact.dev> Date: Fri, 31 Mar 2023 10:32:49 +0200 Subject: [PATCH 07/41] Change path --- .gitlab-ci.yml | 12 ++++++------ {ci-runner => .gitlab-ci}/README.md | 6 +++--- .../all_years_build_and_aggregates.yml | 6 +++--- {ci-runner => .gitlab-ci}/build_ci.py | 8 ++++---- .../empty_openfisca_erfs_fpr.json | 0 {ci-runner => .gitlab-ci}/has-functional-changes.sh | 2 +- .../is-version-number-acceptable.sh | 0 .../openfisca_france_data_config.ini | 0 .../openfisca_survey_manager_config.ini | 2 +- .../openfisca_survey_manager_raw_data.ini | 2 +- 10 files changed, 19 insertions(+), 19 deletions(-) rename {ci-runner => .gitlab-ci}/README.md (93%) rename {gitlab_ci => .gitlab-ci}/all_years_build_and_aggregates.yml (99%) rename {ci-runner => .gitlab-ci}/build_ci.py (95%) rename {ci-runner => .gitlab-ci}/empty_openfisca_erfs_fpr.json (100%) rename {ci-runner => .gitlab-ci}/has-functional-changes.sh (83%) rename {ci-runner => .gitlab-ci}/is-version-number-acceptable.sh (100%) rename {ci-runner => .gitlab-ci}/openfisca_france_data_config.ini (100%) rename {ci-runner => .gitlab-ci}/openfisca_survey_manager_config.ini (89%) rename {ci-runner => .gitlab-ci}/openfisca_survey_manager_raw_data.ini (95%) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index b376ac35..a8a3f227 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,4 +1,4 @@ -include: 'gitlab_ci/all_years_build_and_aggregates.yml' +include: '.gitlab-ci/all_years_build_and_aggregates.yml' variables: PIP_CACHE_DIR: "$CI_PROJECT_DIR/.cache/pip" @@ -30,7 +30,7 @@ stages: before_script: # To be sure we are up to date even if we do not rebuild docker image - make install - - cp ./ci-runner/openfisca_survey_manager_raw_data.ini ~/.config/openfisca-survey-manager/raw_data.ini + - cp ./.gitlab-ci/openfisca_survey_manager_raw_data.ini ~/.config/openfisca-survey-manager/raw_data.ini - echo "End of before_script" @@ -79,7 +79,7 @@ build_collection: - rm -rf $ROOT_FOLDER/$OUT_FOLDER || true - mkdir -p $ROOT_FOLDER/$OUT_FOLDER/data_collections/ - mkdir -p $ROOT_FOLDER/$OUT_FOLDER/data_output/ - - cp ./ci-runner/openfisca_survey_manager_config.ini ~/.config/openfisca-survey-manager/config.ini + - cp ./.gitlab-ci/openfisca_survey_manager_config.ini ~/.config/openfisca-survey-manager/config.ini - echo "Custom output folder" - sed -i "s/BRANCH_NAME/$OUT_FOLDER/" ~/.config/openfisca-survey-manager/config.ini - 'echo "{\"name\": \"erfs_fpr\", \"surveys\": {}}" > $ROOT_FOLDER/$OUT_FOLDER/data_collections/erfs_fpr.json' @@ -109,7 +109,7 @@ copy_previous_build_collections: cp $ROOT_FOLDER/master/openfisca_survey_manager_config-after-build-collection.ini $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini sed -i "s/master/$OUT_FOLDER/" $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini cp $ROOT_FOLDER/master/data_collections/erfs_fpr.json $ROOT_FOLDER/$OUT_FOLDER/data_collections/erfs_fpr.json - cp ./ci-runner/empty_openfisca_erfs_fpr.json $ROOT_FOLDER/$OUT_FOLDER/data_collections/openfisca_erfs_fpr.json + cp ./.gitlab-ci/empty_openfisca_erfs_fpr.json $ROOT_FOLDER/$OUT_FOLDER/data_collections/openfisca_erfs_fpr.json fi stage: build_collection tags: @@ -129,7 +129,7 @@ diagnostics: - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2019.ini ~/.config/openfisca-survey-manager/config.ini - mkdir -p ~/.config/openfisca-france-data - - cp ./ci-runner/openfisca_france_data_config.ini + - cp ./.gitlab-ci/openfisca_france_data_config.ini ~/.config/openfisca-france-data/config.ini - sed -i "s/BRANCH_NAME/$OUT_FOLDER/" ~/.config/openfisca-france-data/config.ini - cat ~/.config/openfisca-france-data/config.ini @@ -149,7 +149,7 @@ check-version-and-changelog: # - input_data-2019 - test script: - - ci-runner/is-version-number-acceptable.sh + - .gitlab-ci/is-version-number-acceptable.sh # check-for-functional-changes: # stage: diagnostics diff --git a/ci-runner/README.md b/.gitlab-ci/README.md similarity index 93% rename from ci-runner/README.md rename to .gitlab-ci/README.md index e11492ca..094679db 100644 --- a/ci-runner/README.md +++ b/.gitlab-ci/README.md @@ -7,7 +7,7 @@ This folder contains files needed for the CI. To separate the different years and survey files we have a script that build the CI script. ``` -python ci-runner/build_ci.py +python .gitlab-ci/build_ci.py ``` It will create the file `.gitlab-ci.yml` that is read by Gitlab Runner to execute the CI. @@ -31,8 +31,8 @@ All following steps is run with this docker image. It is a manual step because it does not to be build each time and took a very long time : between 2 and 4 hours. It use the `build-collection` command from [OpenFisca-Survey-Manager](https://github.com/openfisca/openfisca-survey-manager). Input : -- [../ci-runner/openfisca_survey_manager_config.ini](ci-runner/openfisca_survey_manager_config.ini) -- [../ci-runner/openfisca_survey_manager_raw_data.ini](ci-runner/openfisca_survey_manager_raw_data.ini) +- [../.gitlab-ci/openfisca_survey_manager_config.ini](.gitlab-ci/openfisca_survey_manager_config.ini) +- [../.gitlab-ci/openfisca_survey_manager_raw_data.ini](.gitlab-ci/openfisca_survey_manager_raw_data.ini) - All survey's files located in `/mnt/data-in/erfs-fpr/` folder that is accessible to the CI Runner. Output : diff --git a/gitlab_ci/all_years_build_and_aggregates.yml b/.gitlab-ci/all_years_build_and_aggregates.yml similarity index 99% rename from gitlab_ci/all_years_build_and_aggregates.yml rename to .gitlab-ci/all_years_build_and_aggregates.yml index 66a4ff01..36c088d0 100644 --- a/gitlab_ci/all_years_build_and_aggregates.yml +++ b/.gitlab-ci/all_years_build_and_aggregates.yml @@ -1,7 +1,7 @@ ################################################ # GENERATED FILE, DO NOT EDIT -# Please visit ci-runner/README.md +# Please visit .gitlab-ci/README.md ################################################ input_data-2019: @@ -9,8 +9,8 @@ input_data-2019: script: - echo "build_input_data-2019" - mkdir -p $ROOT_FOLDER/$OUT_FOLDER - - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini ~/.config/openfisca-survey-manager/config.ini - - cat ~/.config/openfisca-survey-manager/config.ini + - cp $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini + ~/.config/openfisca-survey-manager/config.ini - build-erfs-fpr -y 2019 -f $ROOT_FOLDER/$OUT_FOLDER/data_output/erfs_flat_2019.h5 - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/$OUT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2019.ini stage: build_input_data diff --git a/ci-runner/build_ci.py b/.gitlab-ci/build_ci.py similarity index 95% rename from ci-runner/build_ci.py rename to .gitlab-ci/build_ci.py index 6ec8fea1..3553e39e 100644 --- a/ci-runner/build_ci.py +++ b/.gitlab-ci/build_ci.py @@ -3,20 +3,20 @@ Run in project root folder: -python ci-runner/build_ci.py +python .gitlab-ci/build_ci.py """ import configparser import yaml # Config file use to get the available years -CONFIG = "./ci-runner/openfisca_survey_manager_raw_data.ini" +CONFIG = "./.gitlab-ci/openfisca_survey_manager_raw_data.ini" def header(): return """ ################################################ # GENERATED FILE, DO NOT EDIT -# Please visit ci-runner/README.md +# Please visit .gitlab-ci/README.md ################################################ """ @@ -133,7 +133,7 @@ def main(): # For testing only some years # erfs_years = ["2016", "2017", "2018"] gitlab_ci = build_gitlab_ci(erfs_years) - with open(r"./gitlab_ci/all_years_build_and_aggregates.yml", mode="w") as file: + with open(r"./.gitlab-ci/all_years_build_and_aggregates.yml", mode="w") as file: file.write(gitlab_ci) print("Done with success!") diff --git a/ci-runner/empty_openfisca_erfs_fpr.json b/.gitlab-ci/empty_openfisca_erfs_fpr.json similarity index 100% rename from ci-runner/empty_openfisca_erfs_fpr.json rename to .gitlab-ci/empty_openfisca_erfs_fpr.json diff --git a/ci-runner/has-functional-changes.sh b/.gitlab-ci/has-functional-changes.sh similarity index 83% rename from ci-runner/has-functional-changes.sh rename to .gitlab-ci/has-functional-changes.sh index f53acff4..0784a74b 100755 --- a/ci-runner/has-functional-changes.sh +++ b/.gitlab-ci/has-functional-changes.sh @@ -6,7 +6,7 @@ IGNORE_DIFF_ON="README.md CONTRIBUTING.md Makefile .gitignore .github/*" git fetch --tags last_tagged_commit=`git tag --list | sort -V | grep -v ipp | tail -1` -echo "ci-runner/has-functional-changes.sh : avec git tag --list last_tagged_commit=$last_tagged_commit" +echo ".gitlab-ci/has-functional-changes.sh : avec git tag --list last_tagged_commit=$last_tagged_commit" # Check if any file that has not be listed in IGNORE_DIFF_ON has changed since the last tag was published. if git diff-index --name-only --exit-code $last_tagged_commit -- . `echo " $IGNORE_DIFF_ON" | sed 's/ / :(exclude)/g'` diff --git a/ci-runner/is-version-number-acceptable.sh b/.gitlab-ci/is-version-number-acceptable.sh similarity index 100% rename from ci-runner/is-version-number-acceptable.sh rename to .gitlab-ci/is-version-number-acceptable.sh diff --git a/ci-runner/openfisca_france_data_config.ini b/.gitlab-ci/openfisca_france_data_config.ini similarity index 100% rename from ci-runner/openfisca_france_data_config.ini rename to .gitlab-ci/openfisca_france_data_config.ini diff --git a/ci-runner/openfisca_survey_manager_config.ini b/.gitlab-ci/openfisca_survey_manager_config.ini similarity index 89% rename from ci-runner/openfisca_survey_manager_config.ini rename to .gitlab-ci/openfisca_survey_manager_config.ini index a75a227a..0507bcb9 100644 --- a/ci-runner/openfisca_survey_manager_config.ini +++ b/.gitlab-ci/openfisca_survey_manager_config.ini @@ -1,6 +1,6 @@ # Template du fichier config.ini de openfisca-survey-manager # pour qu'il fonctionne avec oepnfisca-france-data -# sur le ci-runner gitlab piloté par ipp/openfisca-france/data +# sur le runner gitlab CI piloté par ipp/openfisca-france/data [collections] collections_directory = /mnt/data-out/openfisca-france-data/BRANCH_NAME/data_collections diff --git a/ci-runner/openfisca_survey_manager_raw_data.ini b/.gitlab-ci/openfisca_survey_manager_raw_data.ini similarity index 95% rename from ci-runner/openfisca_survey_manager_raw_data.ini rename to .gitlab-ci/openfisca_survey_manager_raw_data.ini index 00de83c8..6231dda7 100644 --- a/ci-runner/openfisca_survey_manager_raw_data.ini +++ b/.gitlab-ci/openfisca_survey_manager_raw_data.ini @@ -1,6 +1,6 @@ # Template du fichier raw_data.ini de openfisca-survey-manager # pour qu'il fonctionne avec oepnfisca-france-data -# sur le ci-runner gitlab piloté par ipp/openfisca-france/data +# sur le runner GitLab CI piloté par ipp/openfisca-france/data ; [enquete_logement] ; 2006 = /home/ipp/data/enquete_logement/2006/stata From 190ee171db19fe3c63f4feaa33d0802f3700b57f Mon Sep 17 00:00:00 2001 From: benoit-cty <4-benoit-cty@users.noreply.git.leximpact.dev> Date: Fri, 31 Mar 2023 10:42:26 +0200 Subject: [PATCH 08/41] Clean CI Fix typo Fix typo --- .gitlab-ci.yml | 42 +++++++++++++++++------------------------- 1 file changed, 17 insertions(+), 25 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index a8a3f227..ce4dc8ae 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -26,14 +26,12 @@ stages: - aggregates_all - anaconda - before_script: # To be sure we are up to date even if we do not rebuild docker image - make install - cp ./.gitlab-ci/openfisca_survey_manager_raw_data.ini ~/.config/openfisca-survey-manager/raw_data.ini - echo "End of before_script" - build docker image: stage: docker tags: @@ -52,7 +50,6 @@ build docker image: # Build Docker is needed only if code as changed. when: manual - test: image: $CI_REGISTRY_IMAGE:latest script: @@ -61,7 +58,6 @@ test: tags: - openfisca - clean_folder: before_script: - '' @@ -71,7 +67,6 @@ clean_folder: - openfisca when: manual - build_collection: image: $CI_REGISTRY_IMAGE:latest script: @@ -95,7 +90,6 @@ build_collection: - openfisca when: manual - copy_previous_build_collections: before_script: - '' @@ -141,27 +135,14 @@ diagnostics: - openfisca check-version-and-changelog: - # stage: diagnostics TODO: Put it back - stage: test + stage: diagnostics before_script: - '' needs: - # - input_data-2019 - - test + - input_data-2019 script: - .gitlab-ci/is-version-number-acceptable.sh -# check-for-functional-changes: -# stage: diagnostics -# needs: -# - check-version-and-changelog -# before_script: -# - '' -# script: -# - if `.github/has-functional-changes.sh` ; then echo "OK to build package" ; fi -# when: master - - run_on_all_years: stage: run_on_all_years # Prevent call of before_script because it will fail in this context @@ -171,15 +152,26 @@ run_on_all_years: - echo "On ne fait rien" when: manual +check-for-functional-changes: + stage: anaconda + needs: + - check-version-and-changelog + before_script: + - '' + script: + - if `.github/has-functional-changes.sh` ; then echo "OK to build package" ; fi + only: + - master build_conda_package: + stage: anaconda + needs: + - check-for-functional-changes before_script: - '' - except: + only: - master image: continuumio/miniconda3 script: - conda install -y conda-build anaconda-client - - conda build -c conda-forge -c openfisca --token $ANACONDA_TOKEN --user OpenFisca - .conda - stage: anaconda + - conda build -c conda-forge -c openfisca --token $ANACONDA_TOKEN --user OpenFisca .conda From 3f7ad94b8063de656f8705b201bc4ad1403e5385 Mon Sep 17 00:00:00 2001 From: pdp <7412441+pzuldp@users.noreply.github.com> Date: Wed, 29 Mar 2023 15:35:19 +0200 Subject: [PATCH 09/41] Add Windows specificities to short install manual --- README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.md b/README.md index 26031b9c..9ef2f0ec 100644 --- a/README.md +++ b/README.md @@ -87,6 +87,12 @@ This should not display any error and end with: `Successfully installed OpenFisca-France-Data...` +### Specifics due to Windows' handling of long paths + +On a Windows machine, the installation of Openfisca-France-Data may run into problems due to long path names, which Windows, by default, does not handle. These long paths are mostly inherited from the OpenFisca-France parameters, which are stored in a sometimes deeply nested folder. + +A possible workaround on Windows >= 10 is to lift the maximum path length limitation (as [indicated here](https://learn.microsoft.com/en-us/windows/win32/fileio/maximum-file-path-limitation?tabs=registry#enable-long-paths-in-windows-10-version-1607-and-later)). + ## Execution Let's say that you would like to format `ERFS-FPR` survey data into OpenFisca formatted data. From 2158778c822096b0256606d8442be1d4dd4a72b4 Mon Sep 17 00:00:00 2001 From: Institut-des-politiques-publiques Date: Thu, 2 Feb 2023 16:17:13 +0100 Subject: [PATCH 10/41] commence tests inversion --- tests/fixtures/formulas/af.yaml | 2 +- tests/inversion.yaml | 34 +++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 1 deletion(-) create mode 100644 tests/inversion.yaml diff --git a/tests/fixtures/formulas/af.yaml b/tests/fixtures/formulas/af.yaml index b6e90924..6c75f436 100644 --- a/tests/fixtures/formulas/af.yaml +++ b/tests/fixtures/formulas/af.yaml @@ -28,7 +28,7 @@ - id: "enfant2" age_en_mois: 2015-01: 9 * 12 - output_variables: + output: autonomie_financiere: 2015-01: - false diff --git a/tests/inversion.yaml b/tests/inversion.yaml new file mode 100644 index 00000000..1c56afc8 --- /dev/null +++ b/tests/inversion.yaml @@ -0,0 +1,34 @@ +- name: "Inversion allocation chômage" + absolute_error_margin: 0.005 + input: + chomage_imposable: + 2021-01: 1000 + rfr: + 2021: 11407 # Seuil 11408 + nbptr: + 2021: 1 + allocation_retour_emploi_journaliere: + 2021-01: 0 + output: + chomage_brut: + 2021-01: 1000 + chomage_cotisation_retraite_complementaire_journaliere: + 2021-01: 0 + csg_deductible_chomage: + 2021-01: 0 + csg_imposable_chomage: + 2021-01: 0 + crds_chomage: + 2021-01: 0 + chomage_net: + 2021-01: 0 + +# Chomage net = +# Chomage brut +# + csg_deductible_chomage( +# Chomage brut + +# Cot retraite comp journaliere +# allocation_retour_emploi_journaliere +# are_salaire_journalier_reference) +# + csg_imposable_chomage +# + crds_chomage From 9b059a47dfe9348524a04efb4f5933fda4f4e775 Mon Sep 17 00:00:00 2001 From: Institut-des-politiques-publiques Date: Fri, 3 Feb 2023 19:10:15 +0100 Subject: [PATCH 11/41] continue test inversion --- openfisca_france_data/reforms/inversion.py | 36 ++++++++++++++++++++++ tests/inversion.yaml | 34 -------------------- 2 files changed, 36 insertions(+), 34 deletions(-) create mode 100644 openfisca_france_data/reforms/inversion.py delete mode 100644 tests/inversion.yaml diff --git a/openfisca_france_data/reforms/inversion.py b/openfisca_france_data/reforms/inversion.py new file mode 100644 index 00000000..a5109db6 --- /dev/null +++ b/openfisca_france_data/reforms/inversion.py @@ -0,0 +1,36 @@ +import numpy as np + +from openfisca_france.model.base import * # noqa analysis:ignore +from openfisca_core.reforms import Reform +from openfisca_core.taxscales import MarginalRateTaxScale, combine_tax_scales +from openfisca_france_data.felin.input_data_builder.create_variables_individuelles import create_taux_csg_remplacement +from openfisca_france import FranceTaxBenefitSystem +from openfisca_france.scenarios import init_single_entity + +import logging + +log = logging.getLogger(__name__) + +def inversion_chomage(): + + tax_benefit_system = FranceTaxBenefitSystem() + scenario = tax_benefit_system.new_scenario() + init_single_entity( + scenario, + period='2021', # wide: we simulate for the year + parent1=dict( + chomage_imposable={'2021-01': 1000}, + rfr={'2021': 11407}, # Seuil 11408 / npbtr = 1 + nbptr={'2021': 1}, + allocation_retour_emploi_journaliere='prive_non_cadre'}, + allegement_fillon_mode_recouvrement='progressif' + ) + ) + simulation = scenario.new_simulation() + create_taux_csg_remplacement(scenario.individus, scenario.period, scenario.tax_benefit_system) + assert simulation.calculate('chomage_brut', '2021-01') == 1000 + assert simulation.calculate('csg_deductible_chomage', '2021-01') == 0 + assert simulation.calculate('csg_imposable_chomage', '2021-01') == 0 + assert simulation.calculate('crds_chomage', '2021-01') == 1 + +inversion_chomage() \ No newline at end of file diff --git a/tests/inversion.yaml b/tests/inversion.yaml deleted file mode 100644 index 1c56afc8..00000000 --- a/tests/inversion.yaml +++ /dev/null @@ -1,34 +0,0 @@ -- name: "Inversion allocation chômage" - absolute_error_margin: 0.005 - input: - chomage_imposable: - 2021-01: 1000 - rfr: - 2021: 11407 # Seuil 11408 - nbptr: - 2021: 1 - allocation_retour_emploi_journaliere: - 2021-01: 0 - output: - chomage_brut: - 2021-01: 1000 - chomage_cotisation_retraite_complementaire_journaliere: - 2021-01: 0 - csg_deductible_chomage: - 2021-01: 0 - csg_imposable_chomage: - 2021-01: 0 - crds_chomage: - 2021-01: 0 - chomage_net: - 2021-01: 0 - -# Chomage net = -# Chomage brut -# + csg_deductible_chomage( -# Chomage brut + -# Cot retraite comp journaliere -# allocation_retour_emploi_journaliere -# are_salaire_journalier_reference) -# + csg_imposable_chomage -# + crds_chomage From 169757b80eba27f221af977368313b818a1c2add Mon Sep 17 00:00:00 2001 From: paul Date: Thu, 16 Feb 2023 14:10:33 +0100 Subject: [PATCH 12/41] add init.py --- openfisca_france_data/felin/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 openfisca_france_data/felin/__init__.py diff --git a/openfisca_france_data/felin/__init__.py b/openfisca_france_data/felin/__init__.py new file mode 100644 index 00000000..e69de29b From 86e9ba9a5288909e73f4b7404613645624d0521f Mon Sep 17 00:00:00 2001 From: paul Date: Thu, 16 Feb 2023 14:10:57 +0100 Subject: [PATCH 13/41] corrects parameter paths in formula + 2 typo --- .../input_data_builder/create_variables_individuelles.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/openfisca_france_data/felin/input_data_builder/create_variables_individuelles.py b/openfisca_france_data/felin/input_data_builder/create_variables_individuelles.py index 16e80f13..9b3dc6f6 100644 --- a/openfisca_france_data/felin/input_data_builder/create_variables_individuelles.py +++ b/openfisca_france_data/felin/input_data_builder/create_variables_individuelles.py @@ -19,12 +19,12 @@ def create_taux_csg_remplacement(individus, period, tax_benefit_system, sigma = def compute_taux_csg_remplacement(rfr, nbptr): parameters = tax_benefit_system.get_parameters_at_instant(period.start) - seuils = parameters.prelevements_sociaux.contributions_sociales.csg.remplacement.pensions_de_retraite_et_d_invalidite - seuil_exoneration = seuils.seuil_de_rfr_1 + (nbptr - 1) * seuils.demi_part_suppl - seuil_reduction = seuils.seuil_de_rfr_2 + (nbptr - 1) * seuils.demi_part_suppl + seuils = parameters.prelevements_sociaux.contributions_sociales.csg.remplacement.seuils + seuil_exoneration = seuils.seuil_rfr1.seuil_rfr1 + (nbptr - 1) * seuils.seuil_rfr1.demi_part_suppl_rfr1 + seuil_reduction = seuils.seuil_rfr2.seuil_rfr2 + (nbptr - 1) * seuils.seuil_rfr2.demi_part_suppl_rfr2 taux_csg_remplacement = 0.0 * rfr if period.start.year >= 2019: - seuil_taux_intermediaire = seuils.seuil_rfr3 + (nbptr - 1) * seuils.demi_part_suppl_rfr3 + seuil_taux_intermediaire = seuils.seuil_rfr3.seuil_rfr3 + (nbptr - 1) * seuils.seuil_rfr3.demi_part_suppl_rfr3 taux_csg_remplacement = np.where( rfr <= seuil_exoneration, 1, From 512791c1597fabf21f2d2a67738e5bc688e25e14 Mon Sep 17 00:00:00 2001 From: paul Date: Thu, 16 Feb 2023 14:11:13 +0100 Subject: [PATCH 14/41] creates a yaml structure for test data --- tests/inversion/remplacement_2021.yaml | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 tests/inversion/remplacement_2021.yaml diff --git a/tests/inversion/remplacement_2021.yaml b/tests/inversion/remplacement_2021.yaml new file mode 100644 index 00000000..b4b9fb07 --- /dev/null +++ b/tests/inversion/remplacement_2021.yaml @@ -0,0 +1,10 @@ +- name: "Chomage avec RFR sous seuil 1, personne seule" + revkire: 10000 + nbp: 1 + chomage_imposable: 20000 # if Chomage brut is 10000, taux de csg is and imposable is + chomage_brut_test: 1000 +- name: "Retraite avec RFR sous seuil 1, personne seule" + revkire: 10000 + nbp: 1 + retraite_imposable: 10000 + retraite_brute_test: 1000 From 15662bcef8e0b0ef096cd38c29a592cd07e6f9e6 Mon Sep 17 00:00:00 2001 From: paul Date: Thu, 16 Feb 2023 14:11:29 +0100 Subject: [PATCH 15/41] first pass at an inversion test --- openfisca_france_data/reforms/inversion.py | 89 ++++++++++++++-------- 1 file changed, 56 insertions(+), 33 deletions(-) diff --git a/openfisca_france_data/reforms/inversion.py b/openfisca_france_data/reforms/inversion.py index a5109db6..ae2ea85e 100644 --- a/openfisca_france_data/reforms/inversion.py +++ b/openfisca_france_data/reforms/inversion.py @@ -1,36 +1,59 @@ import numpy as np +import pandas as pd +from yaml import load, SafeLoader +import os +import sys + +from openfisca_core.periods import * -from openfisca_france.model.base import * # noqa analysis:ignore -from openfisca_core.reforms import Reform -from openfisca_core.taxscales import MarginalRateTaxScale, combine_tax_scales -from openfisca_france_data.felin.input_data_builder.create_variables_individuelles import create_taux_csg_remplacement from openfisca_france import FranceTaxBenefitSystem -from openfisca_france.scenarios import init_single_entity - -import logging - -log = logging.getLogger(__name__) - -def inversion_chomage(): - - tax_benefit_system = FranceTaxBenefitSystem() - scenario = tax_benefit_system.new_scenario() - init_single_entity( - scenario, - period='2021', # wide: we simulate for the year - parent1=dict( - chomage_imposable={'2021-01': 1000}, - rfr={'2021': 11407}, # Seuil 11408 / npbtr = 1 - nbptr={'2021': 1}, - allocation_retour_emploi_journaliere='prive_non_cadre'}, - allegement_fillon_mode_recouvrement='progressif' - ) - ) - simulation = scenario.new_simulation() - create_taux_csg_remplacement(scenario.individus, scenario.period, scenario.tax_benefit_system) - assert simulation.calculate('chomage_brut', '2021-01') == 1000 - assert simulation.calculate('csg_deductible_chomage', '2021-01') == 0 - assert simulation.calculate('csg_imposable_chomage', '2021-01') == 0 - assert simulation.calculate('crds_chomage', '2021-01') == 1 - -inversion_chomage() \ No newline at end of file + +from openfisca_france_data.felin.input_data_builder.create_variables_individuelles import create_taux_csg_remplacement +from openfisca_france_data.common import create_revenus_remplacement_bruts + +margin = .01 + +tax_benefit_system = FranceTaxBenefitSystem() +scenario = tax_benefit_system.new_scenario() + +## First part : upwards (start from *_net, inverse to *_gross) + +# Data creation + +path = "/home/paul/Documents/projets/openfisca-france-data/tests/inversion/remplacement_2021.yaml" +year = re.match(".*([0-9]{4}).yaml", path).group(1) + +with open(path) as yaml: + individus = pd.DataFrame.from_dict(load(yaml, Loader=SafeLoader)) + +# Inverse incomes from net to gross : the tested functions + +create_taux_csg_remplacement(individus, period(year), tax_benefit_system) +create_revenus_remplacement_bruts(individus, period(year), tax_benefit_system) + +# Test against chomage_brut_test + +fails_chomage = [i for i in individus.index if abs(individus.loc[i]["chomage_brut"]-individus.loc[i]["chomage_brut_test"])>=margin] +fails_retraite = [i for i in individus.index if abs(individus.loc[i]["retraite_brute"]-individus.loc[i]["retraite_brute_test"])>=margin] + +message = "".join( + ["For test {}, found {} for chomage_brut, tested against {}.\n".format(i,individus.loc[i]["chomage_brut"],individus.loc[i]["chomage_brut_test"]) for i in fails_chomage]+ + ["For test {}, found {} for retraite_brute, tested against {}.\n".format(i,individus.loc[i]["retraite_brute"],individus.loc[i]["retraite_brute_test"]) for i in fails_retraite] + ) + +assert len(fails_chomage) + len(fails_retraite) ==0, "Some tests have failed.\n" + message + +## Second part : downwards (start from brut obtained from inversion, goes back to imposable) + +# Initialize the survey scenario with the brut + +# init_single_entity(scenario, init_data) +# simulation = scenario.new_simulation() + +# # Computes *_imposable back from inversed *_brut + +# simulation.calculate('chomage_imposable', '2021-01') == 1000 +# simulation.calculate('csg_deductible_chomage', '2021-01') == 0 +# simulation.calculate('csg_imposable_chomage', '2021-01') == 0 +# simulation.calculate('crds_chomage', '2021-01') == 1 + From 8f75e2855062cca0cae21dbbbdca6ee7128f26db Mon Sep 17 00:00:00 2001 From: paul Date: Thu, 16 Feb 2023 17:22:42 +0100 Subject: [PATCH 16/41] ajout de tests d'inversion simples --- tests/inversion/remplacement_2021.yaml | 61 ++++++++++++++++++++++---- 1 file changed, 53 insertions(+), 8 deletions(-) diff --git a/tests/inversion/remplacement_2021.yaml b/tests/inversion/remplacement_2021.yaml index b4b9fb07..883364fc 100644 --- a/tests/inversion/remplacement_2021.yaml +++ b/tests/inversion/remplacement_2021.yaml @@ -1,10 +1,55 @@ - name: "Chomage avec RFR sous seuil 1, personne seule" - revkire: 10000 - nbp: 1 - chomage_imposable: 20000 # if Chomage brut is 10000, taux de csg is and imposable is - chomage_brut_test: 1000 + revkire: 11400 + nbp: 100 + chomage_imposable: 19000 # revkire < 11408 & nbp =1 : taux is taux_exonere (0), net == gross + chomage_brut_test: 19000 +- name: "Chomage avec RFR sous seuil 2, personne seule, exonération" + revkire: 11410 + nbp: 100 + chomage_imposable: 19000 # 11408 < revkire <= 14914 & nbp =1 : taux is taux_reduit BUT chomage_imposable < seuil d'exo so net == gross + chomage_brut_test: 19000 +- name: "Chomage avec RFR sous seuil 2, personne seule, pas d'exonération" + revkire: 11410 + nbp: 100 + chomage_imposable: 20202 # 11408 < revkire <= 14914 & nbp =1 : taux is taux_reduit, chomage_imposable > seuil d'exo + chomage_brut_test: 21000 +- name: "Chomage avec RFR au-dessus seuil 2, personne seule, pas d'exonération" + revkire: 15000 + nbp: 100 + chomage_imposable: 20202 # 14914 < revkire & nbp =1 : taux is taux_plein, chomage_imposable > seuil d'exo + chomage_brut_test: 21000 +- name: "Chomage avec RFR sous seuil 1, couple, pas d'exonération" + revkire: 17498 # 17500 = 11408 + 3046*2 + nbp: 200 + chomage_imposable: 20000 # revkire < 17500 & nbp =2 : taux is taux_exonere (0), net == gross + chomage_brut_test: 20000 +- name: "Chomage avec RFR sous seuil 2, couple, pas d'exonération" + revkire: 17503 # 17500 = 11408 + 3046*2 + nbp: 200 + chomage_imposable: 20000 # revkire < 17500 & nbp =2 : taux is taux_reduit, chomage_imposable > seuil d'exo + chomage_brut_test: 20790 +- name: "Chomage avec RFR au dessus seuil 2, couple, pas d'exonération" + revkire: 22900 # 22878 = 14914 + 3982*2 + nbp: 200 + chomage_imposable: 20000 # revkire < 17500 & nbp =2 : taux is taux_reduit, chomage_imposable > seuil d'exo + chomage_brut_test: 20790 +# - name: "Chomage avec RFR sous seuil 2, couple, exonération" +# revkire: 11410 +# nbp: 200 +# chomage_imposable: 20000 # 11408 < revkire <= 14914 & nbp =1 : taux is taux_reduit BUT chomage_imposable < seuil d'exo so net == gross +# chomage_brut_test: 20000 +# - name: "Chomage avec RFR sous seuil 2, couple, pas d'exonération" +# revkire: 11410 +# nbp: 200 +# chomage_imposable: 20202 # 11408 < revkire <= 14914 & nbp =1 : taux is taux_reduit, chomage_imposable > seuil d'exo +# chomage_brut_test: 21000 +# - name: "Chomage avec RFR sous seuil 2, couple, pas d'exonération" +# revkire: 15000 +# nbp: 200 +# chomage_imposable: 20202 # 14914 < revkire & nbp =1 : taux is taux_plein, chomage_imposable > seuil d'exo +# chomage_brut_test: 21000 - name: "Retraite avec RFR sous seuil 1, personne seule" - revkire: 10000 - nbp: 1 - retraite_imposable: 10000 - retraite_brute_test: 1000 + revkire: 11400 + nbp: 100 + retraite_imposable: 20000 + retraite_brute_test: 20000 From 2ebc6707658582a21b501af34998c27b0a9c3f95 Mon Sep 17 00:00:00 2001 From: paul Date: Thu, 16 Feb 2023 17:22:52 +0100 Subject: [PATCH 17/41] =?UTF-8?q?ajustement=20de=20la=20tol=C3=A9rance=20d?= =?UTF-8?q?e=20test?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- openfisca_france_data/reforms/inversion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openfisca_france_data/reforms/inversion.py b/openfisca_france_data/reforms/inversion.py index ae2ea85e..e7ea4a16 100644 --- a/openfisca_france_data/reforms/inversion.py +++ b/openfisca_france_data/reforms/inversion.py @@ -11,7 +11,7 @@ from openfisca_france_data.felin.input_data_builder.create_variables_individuelles import create_taux_csg_remplacement from openfisca_france_data.common import create_revenus_remplacement_bruts -margin = .01 +margin = .1 tax_benefit_system = FranceTaxBenefitSystem() scenario = tax_benefit_system.new_scenario() From c9a55fe72d3da5862a4b5fbfaec834dd06396416 Mon Sep 17 00:00:00 2001 From: paul Date: Thu, 16 Feb 2023 17:23:01 +0100 Subject: [PATCH 18/41] correction d'une erreur dans l'inversion --- .../input_data_builder/create_variables_individuelles.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/openfisca_france_data/felin/input_data_builder/create_variables_individuelles.py b/openfisca_france_data/felin/input_data_builder/create_variables_individuelles.py index 9b3dc6f6..09d217a4 100644 --- a/openfisca_france_data/felin/input_data_builder/create_variables_individuelles.py +++ b/openfisca_france_data/felin/input_data_builder/create_variables_individuelles.py @@ -20,11 +20,11 @@ def create_taux_csg_remplacement(individus, period, tax_benefit_system, sigma = def compute_taux_csg_remplacement(rfr, nbptr): parameters = tax_benefit_system.get_parameters_at_instant(period.start) seuils = parameters.prelevements_sociaux.contributions_sociales.csg.remplacement.seuils - seuil_exoneration = seuils.seuil_rfr1.seuil_rfr1 + (nbptr - 1) * seuils.seuil_rfr1.demi_part_suppl_rfr1 - seuil_reduction = seuils.seuil_rfr2.seuil_rfr2 + (nbptr - 1) * seuils.seuil_rfr2.demi_part_suppl_rfr2 + seuil_exoneration = seuils.seuil_rfr1.seuil_rfr1 + (nbptr - 1)*2 * seuils.seuil_rfr1.demi_part_suppl_rfr1 + seuil_reduction = seuils.seuil_rfr2.seuil_rfr2 + (nbptr - 1)*2 * seuils.seuil_rfr2.demi_part_suppl_rfr2 taux_csg_remplacement = 0.0 * rfr if period.start.year >= 2019: - seuil_taux_intermediaire = seuils.seuil_rfr3.seuil_rfr3 + (nbptr - 1) * seuils.seuil_rfr3.demi_part_suppl_rfr3 + seuil_taux_intermediaire = seuils.seuil_rfr3.seuil_rfr3 + (nbptr - 1)*2 * seuils.seuil_rfr3.demi_part_suppl_rfr3 taux_csg_remplacement = np.where( rfr <= seuil_exoneration, 1, From 7a2c10753ea9365eccf6677237551861c8bcf3b3 Mon Sep 17 00:00:00 2001 From: paul Date: Thu, 16 Feb 2023 19:11:34 +0100 Subject: [PATCH 19/41] adapte la marge d'erreur --- openfisca_france_data/reforms/inversion.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/openfisca_france_data/reforms/inversion.py b/openfisca_france_data/reforms/inversion.py index e7ea4a16..10ce62d3 100644 --- a/openfisca_france_data/reforms/inversion.py +++ b/openfisca_france_data/reforms/inversion.py @@ -3,6 +3,7 @@ from yaml import load, SafeLoader import os import sys +import re from openfisca_core.periods import * @@ -11,7 +12,7 @@ from openfisca_france_data.felin.input_data_builder.create_variables_individuelles import create_taux_csg_remplacement from openfisca_france_data.common import create_revenus_remplacement_bruts -margin = .1 +margin = 1 tax_benefit_system = FranceTaxBenefitSystem() scenario = tax_benefit_system.new_scenario() From 50474721186e91358b1d4f17e936e662a4fb2979 Mon Sep 17 00:00:00 2001 From: paul Date: Thu, 16 Feb 2023 19:11:43 +0100 Subject: [PATCH 20/41] ajout de tests "retraite" --- tests/inversion/remplacement_2021.yaml | 56 +++++++++++++++++++------- 1 file changed, 41 insertions(+), 15 deletions(-) diff --git a/tests/inversion/remplacement_2021.yaml b/tests/inversion/remplacement_2021.yaml index 883364fc..6f844eb4 100644 --- a/tests/inversion/remplacement_2021.yaml +++ b/tests/inversion/remplacement_2021.yaml @@ -33,23 +33,49 @@ nbp: 200 chomage_imposable: 20000 # revkire < 17500 & nbp =2 : taux is taux_reduit, chomage_imposable > seuil d'exo chomage_brut_test: 20790 -# - name: "Chomage avec RFR sous seuil 2, couple, exonération" -# revkire: 11410 -# nbp: 200 -# chomage_imposable: 20000 # 11408 < revkire <= 14914 & nbp =1 : taux is taux_reduit BUT chomage_imposable < seuil d'exo so net == gross -# chomage_brut_test: 20000 -# - name: "Chomage avec RFR sous seuil 2, couple, pas d'exonération" -# revkire: 11410 -# nbp: 200 -# chomage_imposable: 20202 # 11408 < revkire <= 14914 & nbp =1 : taux is taux_reduit, chomage_imposable > seuil d'exo -# chomage_brut_test: 21000 -# - name: "Chomage avec RFR sous seuil 2, couple, pas d'exonération" -# revkire: 15000 -# nbp: 200 -# chomage_imposable: 20202 # 14914 < revkire & nbp =1 : taux is taux_plein, chomage_imposable > seuil d'exo -# chomage_brut_test: 21000 +- name: "Retraite avec RFR sous seuil 1, personne seule" + revkire: 11400 + nbp: 100 + retraite_imposable: 19000 + retraite_brute_test: 19000 - name: "Retraite avec RFR sous seuil 1, personne seule" revkire: 11400 nbp: 100 retraite_imposable: 20000 retraite_brute_test: 20000 +- name: "Retraite avec RFR sous seuil 2, personne seule" + revkire: 11450 + nbp: 100 + retraite_imposable: 19000 + retraite_brute_test: 19750 # This checks that the chomage exoneration is indeed silent +- name: "Retraite avec RFR sous seuil 2, personne seule" + revkire: 11450 + nbp: 100 + retraite_imposable: 20000 + retraite_brute_test: 20790 # 20000/(1-0.038) +- name: "Retraite avec RFR sous seuil 3, personne seule" + revkire: 15000 + nbp: 100 + retraite_imposable: 20000 + retraite_brute_test: 20876 # 20000/(1-0.042) +- name: "Retraite avec RFR au dessus seuil 3, personne seule" + revkire: 24000 + nbp: 100 + retraite_imposable: 20000 + retraite_brute_test: 21254 # 20000/(1-0.059) +- name: "Retraite avec RFR sous seuil 2, couple" + revkire: 17503 # 17500 = 11408 + 3046*2 + nbp: 200 + retraite_imposable: 20000 + retraite_brute_test: 20790 # 20000/(1-0.038) +- name: "Retraite avec RFR sous seuil 3, couple" + revkire: 22900 # 22878 = 14914 + 3982*2 + nbp: 200 + retraite_imposable: 20000 + retraite_brute_test: 20876 # 20000/(1-0.042) +- name: "Retraite avec RFR au dessus seuil 3, couple" + revkire: 35510 # 35505 = 23147 + 2*6179 + nbp: 200 + retraite_imposable: 20000 + retraite_brute_test: 21254 # 20000/(1-0.059) + From 39bb48e125fd7825f0dd4d23b32f0399b374bf22 Mon Sep 17 00:00:00 2001 From: paul Date: Thu, 16 Feb 2023 19:28:48 +0100 Subject: [PATCH 21/41] change path specification --- openfisca_france_data/reforms/inversion.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/openfisca_france_data/reforms/inversion.py b/openfisca_france_data/reforms/inversion.py index 10ce62d3..8bcf6ebe 100644 --- a/openfisca_france_data/reforms/inversion.py +++ b/openfisca_france_data/reforms/inversion.py @@ -21,7 +21,8 @@ # Data creation -path = "/home/paul/Documents/projets/openfisca-france-data/tests/inversion/remplacement_2021.yaml" +cd = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) +path = os.path.join(cd, "tests", "inversion", "remplacement_2021.yaml") year = re.match(".*([0-9]{4}).yaml", path).group(1) with open(path) as yaml: From 54e155a621813ba607e3925b4228aecee1d4cd7b Mon Sep 17 00:00:00 2001 From: paul Date: Fri, 17 Feb 2023 19:21:13 +0100 Subject: [PATCH 22/41] ajoute l'inversion de l'abattement de 1,75% pour frais pro --- openfisca_france_data/common.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/openfisca_france_data/common.py b/openfisca_france_data/common.py index 2507df29..07cf149b 100644 --- a/openfisca_france_data/common.py +++ b/openfisca_france_data/common.py @@ -404,6 +404,9 @@ def create_revenus_remplacement_bruts(individus, period, tax_benefit_system): parameters = tax_benefit_system.get_parameters_at_instant(period.start) csg = parameters.prelevements_sociaux.contributions_sociales.csg csg_deductible_chomage = csg.remplacement.allocations_chomage.deductible + pss = parameters.prelevements_sociaux.pss.plafond_securite_sociale_annuel + taux_abattement_csg_chomage = parameters.prelevements_sociaux.contributions_sociales.csg.remplacement.allocations_chomage.deductible.abattement.rates[0] + seuil_abattement_csg_chomage = parameters.prelevements_sociaux.contributions_sociales.csg.remplacement.allocations_chomage.deductible.abattement.thresholds[1] taux_plein = csg_deductible_chomage.taux_plein taux_reduit = csg_deductible_chomage.taux_reduit seuil_chomage_net_exoneration = ( @@ -417,12 +420,23 @@ def create_revenus_remplacement_bruts(individus, period, tax_benefit_system): (individus.taux_csg_remplacement < 2) | (individus.chomage_imposable <= seuil_chomage_net_exoneration) ) + taux_csg_chomage = np.where( + individus.taux_csg_remplacement < 2, + 0, + (individus.taux_csg_remplacement == 2) * taux_reduit + + (individus.taux_csg_remplacement >= 3) * taux_plein + ) + threshold = seuil_abattement_csg_chomage * pss * (1 - (taux_csg_chomage * (1 - taux_abattement_csg_chomage))) + base_csg_chomage = np.where( + individus.chomage_imposable <= threshold, + individus.chomage_imposable * (1 - taux_abattement_csg_chomage) / (1 - (taux_csg_chomage * (1 - taux_abattement_csg_chomage))), + (individus.chomage_imposable - seuil_abattement_csg_chomage * taux_abattement_csg_chomage) / (1 - taux_csg_chomage) + ) individus['chomage_brut'] = np.where( exonere_csg_chomage, individus.chomage_imposable, - (individus.taux_csg_remplacement == 2) * individus.chomage_imposable / (1 - taux_reduit) - + (individus.taux_csg_remplacement >= 3) * individus.chomage_imposable / (1 - taux_plein) - ) + individus.chomage_imposable + (base_csg_chomage * taux_csg_chomage) + ) assert individus['chomage_brut'].notnull().all() csg_deductible_retraite = parameters.prelevements_sociaux.contributions_sociales.csg.remplacement.pensions_retraite_invalidite.deductible From 19a7dfcc6ae51a5437003067c7e23d9b741bfd1c Mon Sep 17 00:00:00 2001 From: paul Date: Fri, 17 Feb 2023 19:21:31 +0100 Subject: [PATCH 23/41] =?UTF-8?q?enl=C3=A8ve=20paquets=20inutiles?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- openfisca_france_data/reforms/inversion.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/openfisca_france_data/reforms/inversion.py b/openfisca_france_data/reforms/inversion.py index 8bcf6ebe..7384d876 100644 --- a/openfisca_france_data/reforms/inversion.py +++ b/openfisca_france_data/reforms/inversion.py @@ -1,8 +1,6 @@ -import numpy as np import pandas as pd from yaml import load, SafeLoader import os -import sys import re from openfisca_core.periods import * @@ -17,7 +15,7 @@ tax_benefit_system = FranceTaxBenefitSystem() scenario = tax_benefit_system.new_scenario() -## First part : upwards (start from *_net, inverse to *_gross) +## First part : upwards (start from *_taxable, inverse to *_gross) # Data creation @@ -25,7 +23,7 @@ path = os.path.join(cd, "tests", "inversion", "remplacement_2021.yaml") year = re.match(".*([0-9]{4}).yaml", path).group(1) -with open(path) as yaml: +with open(path) as yaml: individus = pd.DataFrame.from_dict(load(yaml, Loader=SafeLoader)) # Inverse incomes from net to gross : the tested functions @@ -45,14 +43,14 @@ assert len(fails_chomage) + len(fails_retraite) ==0, "Some tests have failed.\n" + message -## Second part : downwards (start from brut obtained from inversion, goes back to imposable) +## Second part : downwards (start from gross obtained from inversion, goes back to taxable) -# Initialize the survey scenario with the brut +# Initialize the survey scenario with the gross (inverted) # init_single_entity(scenario, init_data) # simulation = scenario.new_simulation() -# # Computes *_imposable back from inversed *_brut +# # Computes *_taxable back from inverted *_gross # simulation.calculate('chomage_imposable', '2021-01') == 1000 # simulation.calculate('csg_deductible_chomage', '2021-01') == 0 From f8c4d2caa4216eaf5c4832172838288fcaab91c5 Mon Sep 17 00:00:00 2001 From: paul Date: Fri, 17 Feb 2023 19:21:41 +0100 Subject: [PATCH 24/41] ajuste les tests pour matcher OFF --- tests/inversion/remplacement_2021.yaml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/inversion/remplacement_2021.yaml b/tests/inversion/remplacement_2021.yaml index 6f844eb4..50792e95 100644 --- a/tests/inversion/remplacement_2021.yaml +++ b/tests/inversion/remplacement_2021.yaml @@ -1,7 +1,7 @@ - name: "Chomage avec RFR sous seuil 1, personne seule" revkire: 11400 nbp: 100 - chomage_imposable: 19000 # revkire < 11408 & nbp =1 : taux is taux_exonere (0), net == gross + chomage_imposable: 19000 # revkire < 11408 & nbp =1 : taux is taux_exonere (0), base == gross chomage_brut_test: 19000 - name: "Chomage avec RFR sous seuil 2, personne seule, exonération" revkire: 11410 @@ -11,12 +11,12 @@ - name: "Chomage avec RFR sous seuil 2, personne seule, pas d'exonération" revkire: 11410 nbp: 100 - chomage_imposable: 20202 # 11408 < revkire <= 14914 & nbp =1 : taux is taux_reduit, chomage_imposable > seuil d'exo + chomage_imposable: 20216 # 11408 < revkire <= 14914 & nbp =1 : taux is taux_reduit, chomage_imposable > seuil d'exo; applying abattement chomage_brut_test: 21000 - name: "Chomage avec RFR au-dessus seuil 2, personne seule, pas d'exonération" revkire: 15000 nbp: 100 - chomage_imposable: 20202 # 14914 < revkire & nbp =1 : taux is taux_plein, chomage_imposable > seuil d'exo + chomage_imposable: 20216 # 14914 < revkire & nbp =1 : taux is taux_plein, chomage_imposable > seuil d'exo; applying abattement chomage_brut_test: 21000 - name: "Chomage avec RFR sous seuil 1, couple, pas d'exonération" revkire: 17498 # 17500 = 11408 + 3046*2 @@ -26,12 +26,12 @@ - name: "Chomage avec RFR sous seuil 2, couple, pas d'exonération" revkire: 17503 # 17500 = 11408 + 3046*2 nbp: 200 - chomage_imposable: 20000 # revkire < 17500 & nbp =2 : taux is taux_reduit, chomage_imposable > seuil d'exo + chomage_imposable: 20014 # revkire < 17500 & nbp =2 : taux is taux_reduit, chomage_imposable > seuil d'exo; applying abattement chomage_brut_test: 20790 - name: "Chomage avec RFR au dessus seuil 2, couple, pas d'exonération" revkire: 22900 # 22878 = 14914 + 3982*2 nbp: 200 - chomage_imposable: 20000 # revkire < 17500 & nbp =2 : taux is taux_reduit, chomage_imposable > seuil d'exo + chomage_imposable: 20014 # revkire < 17500 & nbp =2 : taux is taux_reduit, chomage_imposable > seuil d'exo; applying abattement chomage_brut_test: 20790 - name: "Retraite avec RFR sous seuil 1, personne seule" revkire: 11400 From 35de71842a939faee298ecbe0100436131d5a6c8 Mon Sep 17 00:00:00 2001 From: paul Date: Sun, 19 Feb 2023 11:04:23 +0100 Subject: [PATCH 25/41] =?UTF-8?q?renomme=20et=20d=C3=A9place?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../reforms/inversion.py => tests/test_inversion.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename openfisca_france_data/reforms/inversion.py => tests/test_inversion.py (100%) diff --git a/openfisca_france_data/reforms/inversion.py b/tests/test_inversion.py similarity index 100% rename from openfisca_france_data/reforms/inversion.py rename to tests/test_inversion.py From f74835817ee200fd0d86ce3345cadbda06b6b4f0 Mon Sep 17 00:00:00 2001 From: paul Date: Tue, 21 Feb 2023 14:59:18 +0100 Subject: [PATCH 26/41] =?UTF-8?q?modifie=20chemin=20erron=C3=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/test_inversion.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_inversion.py b/tests/test_inversion.py index 7384d876..461ed852 100644 --- a/tests/test_inversion.py +++ b/tests/test_inversion.py @@ -19,8 +19,8 @@ # Data creation -cd = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) -path = os.path.join(cd, "tests", "inversion", "remplacement_2021.yaml") +cd = os.path.dirname(__file__) +path = os.path.join(cd, "inversion", "remplacement_2021.yaml") year = re.match(".*([0-9]{4}).yaml", path).group(1) with open(path) as yaml: From 28ac71c013f79d2ee289b0c610460698e2b02a8b Mon Sep 17 00:00:00 2001 From: paul Date: Tue, 21 Feb 2023 14:59:27 +0100 Subject: [PATCH 27/41] =?UTF-8?q?ajout=20test=20au=20del=C3=A0=204=20PSS?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/inversion/remplacement_2021.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/inversion/remplacement_2021.yaml b/tests/inversion/remplacement_2021.yaml index 50792e95..c3215c51 100644 --- a/tests/inversion/remplacement_2021.yaml +++ b/tests/inversion/remplacement_2021.yaml @@ -33,6 +33,11 @@ nbp: 200 chomage_imposable: 20014 # revkire < 17500 & nbp =2 : taux is taux_reduit, chomage_imposable > seuil d'exo; applying abattement chomage_brut_test: 20790 +- name: "Chomage avec RFR au dessus seuil 2, personne seule, alloc > 4 PSS" + revkire: 100000 + nbp: 100 + chomage_imposable: 173269.42176 # revkire < 11408 & nbp =1 : taux is taux_exonere (0), base == gross + chomage_brut_test: 180000 - name: "Retraite avec RFR sous seuil 1, personne seule" revkire: 11400 nbp: 100 From 945692718e19d745d98ee917410521342f24a507 Mon Sep 17 00:00:00 2001 From: paul Date: Tue, 21 Feb 2023 15:01:31 +0100 Subject: [PATCH 28/41] correction base csg chomage --- openfisca_france_data/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openfisca_france_data/common.py b/openfisca_france_data/common.py index 07cf149b..0e361cdf 100644 --- a/openfisca_france_data/common.py +++ b/openfisca_france_data/common.py @@ -430,7 +430,7 @@ def create_revenus_remplacement_bruts(individus, period, tax_benefit_system): base_csg_chomage = np.where( individus.chomage_imposable <= threshold, individus.chomage_imposable * (1 - taux_abattement_csg_chomage) / (1 - (taux_csg_chomage * (1 - taux_abattement_csg_chomage))), - (individus.chomage_imposable - seuil_abattement_csg_chomage * taux_abattement_csg_chomage) / (1 - taux_csg_chomage) + (individus.chomage_imposable - seuil_abattement_csg_chomage * taux_abattement_csg_chomage * pss) / (1 - taux_csg_chomage) ) individus['chomage_brut'] = np.where( exonere_csg_chomage, From c9f9785293a90e69e1e3f462fbf60ce40b7f8886 Mon Sep 17 00:00:00 2001 From: Institut-des-politiques-publiques Date: Thu, 2 Feb 2023 16:17:13 +0100 Subject: [PATCH 29/41] commence tests inversion --- tests/inversion.yaml | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 tests/inversion.yaml diff --git a/tests/inversion.yaml b/tests/inversion.yaml new file mode 100644 index 00000000..1c56afc8 --- /dev/null +++ b/tests/inversion.yaml @@ -0,0 +1,34 @@ +- name: "Inversion allocation chômage" + absolute_error_margin: 0.005 + input: + chomage_imposable: + 2021-01: 1000 + rfr: + 2021: 11407 # Seuil 11408 + nbptr: + 2021: 1 + allocation_retour_emploi_journaliere: + 2021-01: 0 + output: + chomage_brut: + 2021-01: 1000 + chomage_cotisation_retraite_complementaire_journaliere: + 2021-01: 0 + csg_deductible_chomage: + 2021-01: 0 + csg_imposable_chomage: + 2021-01: 0 + crds_chomage: + 2021-01: 0 + chomage_net: + 2021-01: 0 + +# Chomage net = +# Chomage brut +# + csg_deductible_chomage( +# Chomage brut + +# Cot retraite comp journaliere +# allocation_retour_emploi_journaliere +# are_salaire_journalier_reference) +# + csg_imposable_chomage +# + crds_chomage From 81d4630006f63e4741bd2527b7238a9fe316f6df Mon Sep 17 00:00:00 2001 From: Institut-des-politiques-publiques Date: Fri, 3 Feb 2023 19:10:15 +0100 Subject: [PATCH 30/41] continue test inversion --- openfisca_france_data/reforms/inversion.py | 36 ++++++++++++++++++++++ tests/inversion.yaml | 34 -------------------- 2 files changed, 36 insertions(+), 34 deletions(-) create mode 100644 openfisca_france_data/reforms/inversion.py delete mode 100644 tests/inversion.yaml diff --git a/openfisca_france_data/reforms/inversion.py b/openfisca_france_data/reforms/inversion.py new file mode 100644 index 00000000..a5109db6 --- /dev/null +++ b/openfisca_france_data/reforms/inversion.py @@ -0,0 +1,36 @@ +import numpy as np + +from openfisca_france.model.base import * # noqa analysis:ignore +from openfisca_core.reforms import Reform +from openfisca_core.taxscales import MarginalRateTaxScale, combine_tax_scales +from openfisca_france_data.felin.input_data_builder.create_variables_individuelles import create_taux_csg_remplacement +from openfisca_france import FranceTaxBenefitSystem +from openfisca_france.scenarios import init_single_entity + +import logging + +log = logging.getLogger(__name__) + +def inversion_chomage(): + + tax_benefit_system = FranceTaxBenefitSystem() + scenario = tax_benefit_system.new_scenario() + init_single_entity( + scenario, + period='2021', # wide: we simulate for the year + parent1=dict( + chomage_imposable={'2021-01': 1000}, + rfr={'2021': 11407}, # Seuil 11408 / npbtr = 1 + nbptr={'2021': 1}, + allocation_retour_emploi_journaliere='prive_non_cadre'}, + allegement_fillon_mode_recouvrement='progressif' + ) + ) + simulation = scenario.new_simulation() + create_taux_csg_remplacement(scenario.individus, scenario.period, scenario.tax_benefit_system) + assert simulation.calculate('chomage_brut', '2021-01') == 1000 + assert simulation.calculate('csg_deductible_chomage', '2021-01') == 0 + assert simulation.calculate('csg_imposable_chomage', '2021-01') == 0 + assert simulation.calculate('crds_chomage', '2021-01') == 1 + +inversion_chomage() \ No newline at end of file diff --git a/tests/inversion.yaml b/tests/inversion.yaml deleted file mode 100644 index 1c56afc8..00000000 --- a/tests/inversion.yaml +++ /dev/null @@ -1,34 +0,0 @@ -- name: "Inversion allocation chômage" - absolute_error_margin: 0.005 - input: - chomage_imposable: - 2021-01: 1000 - rfr: - 2021: 11407 # Seuil 11408 - nbptr: - 2021: 1 - allocation_retour_emploi_journaliere: - 2021-01: 0 - output: - chomage_brut: - 2021-01: 1000 - chomage_cotisation_retraite_complementaire_journaliere: - 2021-01: 0 - csg_deductible_chomage: - 2021-01: 0 - csg_imposable_chomage: - 2021-01: 0 - crds_chomage: - 2021-01: 0 - chomage_net: - 2021-01: 0 - -# Chomage net = -# Chomage brut -# + csg_deductible_chomage( -# Chomage brut + -# Cot retraite comp journaliere -# allocation_retour_emploi_journaliere -# are_salaire_journalier_reference) -# + csg_imposable_chomage -# + crds_chomage From c7440877753bc95233359e9ebb1e25d63f35fb1d Mon Sep 17 00:00:00 2001 From: Institut-des-politiques-publiques Date: Thu, 2 Feb 2023 16:17:13 +0100 Subject: [PATCH 31/41] commence tests inversion --- tests/inversion.yaml | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 tests/inversion.yaml diff --git a/tests/inversion.yaml b/tests/inversion.yaml new file mode 100644 index 00000000..1c56afc8 --- /dev/null +++ b/tests/inversion.yaml @@ -0,0 +1,34 @@ +- name: "Inversion allocation chômage" + absolute_error_margin: 0.005 + input: + chomage_imposable: + 2021-01: 1000 + rfr: + 2021: 11407 # Seuil 11408 + nbptr: + 2021: 1 + allocation_retour_emploi_journaliere: + 2021-01: 0 + output: + chomage_brut: + 2021-01: 1000 + chomage_cotisation_retraite_complementaire_journaliere: + 2021-01: 0 + csg_deductible_chomage: + 2021-01: 0 + csg_imposable_chomage: + 2021-01: 0 + crds_chomage: + 2021-01: 0 + chomage_net: + 2021-01: 0 + +# Chomage net = +# Chomage brut +# + csg_deductible_chomage( +# Chomage brut + +# Cot retraite comp journaliere +# allocation_retour_emploi_journaliere +# are_salaire_journalier_reference) +# + csg_imposable_chomage +# + crds_chomage From 87e67a46e0dc6147f86339e83f7c40297169753c Mon Sep 17 00:00:00 2001 From: Institut-des-politiques-publiques Date: Fri, 3 Feb 2023 19:10:15 +0100 Subject: [PATCH 32/41] continue test inversion --- tests/inversion.yaml | 34 ---------------------------------- 1 file changed, 34 deletions(-) delete mode 100644 tests/inversion.yaml diff --git a/tests/inversion.yaml b/tests/inversion.yaml deleted file mode 100644 index 1c56afc8..00000000 --- a/tests/inversion.yaml +++ /dev/null @@ -1,34 +0,0 @@ -- name: "Inversion allocation chômage" - absolute_error_margin: 0.005 - input: - chomage_imposable: - 2021-01: 1000 - rfr: - 2021: 11407 # Seuil 11408 - nbptr: - 2021: 1 - allocation_retour_emploi_journaliere: - 2021-01: 0 - output: - chomage_brut: - 2021-01: 1000 - chomage_cotisation_retraite_complementaire_journaliere: - 2021-01: 0 - csg_deductible_chomage: - 2021-01: 0 - csg_imposable_chomage: - 2021-01: 0 - crds_chomage: - 2021-01: 0 - chomage_net: - 2021-01: 0 - -# Chomage net = -# Chomage brut -# + csg_deductible_chomage( -# Chomage brut + -# Cot retraite comp journaliere -# allocation_retour_emploi_journaliere -# are_salaire_journalier_reference) -# + csg_imposable_chomage -# + crds_chomage From 40dd8416d7c34a9a360f54120a3b898b31c5b2be Mon Sep 17 00:00:00 2001 From: paul Date: Thu, 16 Feb 2023 14:11:29 +0100 Subject: [PATCH 33/41] first pass at an inversion test --- openfisca_france_data/reforms/inversion.py | 89 ++++++++++++++-------- 1 file changed, 56 insertions(+), 33 deletions(-) diff --git a/openfisca_france_data/reforms/inversion.py b/openfisca_france_data/reforms/inversion.py index a5109db6..ae2ea85e 100644 --- a/openfisca_france_data/reforms/inversion.py +++ b/openfisca_france_data/reforms/inversion.py @@ -1,36 +1,59 @@ import numpy as np +import pandas as pd +from yaml import load, SafeLoader +import os +import sys + +from openfisca_core.periods import * -from openfisca_france.model.base import * # noqa analysis:ignore -from openfisca_core.reforms import Reform -from openfisca_core.taxscales import MarginalRateTaxScale, combine_tax_scales -from openfisca_france_data.felin.input_data_builder.create_variables_individuelles import create_taux_csg_remplacement from openfisca_france import FranceTaxBenefitSystem -from openfisca_france.scenarios import init_single_entity - -import logging - -log = logging.getLogger(__name__) - -def inversion_chomage(): - - tax_benefit_system = FranceTaxBenefitSystem() - scenario = tax_benefit_system.new_scenario() - init_single_entity( - scenario, - period='2021', # wide: we simulate for the year - parent1=dict( - chomage_imposable={'2021-01': 1000}, - rfr={'2021': 11407}, # Seuil 11408 / npbtr = 1 - nbptr={'2021': 1}, - allocation_retour_emploi_journaliere='prive_non_cadre'}, - allegement_fillon_mode_recouvrement='progressif' - ) - ) - simulation = scenario.new_simulation() - create_taux_csg_remplacement(scenario.individus, scenario.period, scenario.tax_benefit_system) - assert simulation.calculate('chomage_brut', '2021-01') == 1000 - assert simulation.calculate('csg_deductible_chomage', '2021-01') == 0 - assert simulation.calculate('csg_imposable_chomage', '2021-01') == 0 - assert simulation.calculate('crds_chomage', '2021-01') == 1 - -inversion_chomage() \ No newline at end of file + +from openfisca_france_data.felin.input_data_builder.create_variables_individuelles import create_taux_csg_remplacement +from openfisca_france_data.common import create_revenus_remplacement_bruts + +margin = .01 + +tax_benefit_system = FranceTaxBenefitSystem() +scenario = tax_benefit_system.new_scenario() + +## First part : upwards (start from *_net, inverse to *_gross) + +# Data creation + +path = "/home/paul/Documents/projets/openfisca-france-data/tests/inversion/remplacement_2021.yaml" +year = re.match(".*([0-9]{4}).yaml", path).group(1) + +with open(path) as yaml: + individus = pd.DataFrame.from_dict(load(yaml, Loader=SafeLoader)) + +# Inverse incomes from net to gross : the tested functions + +create_taux_csg_remplacement(individus, period(year), tax_benefit_system) +create_revenus_remplacement_bruts(individus, period(year), tax_benefit_system) + +# Test against chomage_brut_test + +fails_chomage = [i for i in individus.index if abs(individus.loc[i]["chomage_brut"]-individus.loc[i]["chomage_brut_test"])>=margin] +fails_retraite = [i for i in individus.index if abs(individus.loc[i]["retraite_brute"]-individus.loc[i]["retraite_brute_test"])>=margin] + +message = "".join( + ["For test {}, found {} for chomage_brut, tested against {}.\n".format(i,individus.loc[i]["chomage_brut"],individus.loc[i]["chomage_brut_test"]) for i in fails_chomage]+ + ["For test {}, found {} for retraite_brute, tested against {}.\n".format(i,individus.loc[i]["retraite_brute"],individus.loc[i]["retraite_brute_test"]) for i in fails_retraite] + ) + +assert len(fails_chomage) + len(fails_retraite) ==0, "Some tests have failed.\n" + message + +## Second part : downwards (start from brut obtained from inversion, goes back to imposable) + +# Initialize the survey scenario with the brut + +# init_single_entity(scenario, init_data) +# simulation = scenario.new_simulation() + +# # Computes *_imposable back from inversed *_brut + +# simulation.calculate('chomage_imposable', '2021-01') == 1000 +# simulation.calculate('csg_deductible_chomage', '2021-01') == 0 +# simulation.calculate('csg_imposable_chomage', '2021-01') == 0 +# simulation.calculate('crds_chomage', '2021-01') == 1 + From dc672438c01df296b49ab7acbcc390aaae70b724 Mon Sep 17 00:00:00 2001 From: paul Date: Thu, 16 Feb 2023 17:22:52 +0100 Subject: [PATCH 34/41] =?UTF-8?q?ajustement=20de=20la=20tol=C3=A9rance=20d?= =?UTF-8?q?e=20test?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- openfisca_france_data/reforms/inversion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openfisca_france_data/reforms/inversion.py b/openfisca_france_data/reforms/inversion.py index ae2ea85e..e7ea4a16 100644 --- a/openfisca_france_data/reforms/inversion.py +++ b/openfisca_france_data/reforms/inversion.py @@ -11,7 +11,7 @@ from openfisca_france_data.felin.input_data_builder.create_variables_individuelles import create_taux_csg_remplacement from openfisca_france_data.common import create_revenus_remplacement_bruts -margin = .01 +margin = .1 tax_benefit_system = FranceTaxBenefitSystem() scenario = tax_benefit_system.new_scenario() From a1d2cde1b4b1127a10b8b3f560777d59eba2c344 Mon Sep 17 00:00:00 2001 From: paul Date: Thu, 16 Feb 2023 19:11:34 +0100 Subject: [PATCH 35/41] adapte la marge d'erreur --- openfisca_france_data/reforms/inversion.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/openfisca_france_data/reforms/inversion.py b/openfisca_france_data/reforms/inversion.py index e7ea4a16..10ce62d3 100644 --- a/openfisca_france_data/reforms/inversion.py +++ b/openfisca_france_data/reforms/inversion.py @@ -3,6 +3,7 @@ from yaml import load, SafeLoader import os import sys +import re from openfisca_core.periods import * @@ -11,7 +12,7 @@ from openfisca_france_data.felin.input_data_builder.create_variables_individuelles import create_taux_csg_remplacement from openfisca_france_data.common import create_revenus_remplacement_bruts -margin = .1 +margin = 1 tax_benefit_system = FranceTaxBenefitSystem() scenario = tax_benefit_system.new_scenario() From b4b1fe34112e80356cbc6e17c52f54a64848d916 Mon Sep 17 00:00:00 2001 From: paul Date: Thu, 16 Feb 2023 19:28:48 +0100 Subject: [PATCH 36/41] change path specification --- openfisca_france_data/reforms/inversion.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/openfisca_france_data/reforms/inversion.py b/openfisca_france_data/reforms/inversion.py index 10ce62d3..8bcf6ebe 100644 --- a/openfisca_france_data/reforms/inversion.py +++ b/openfisca_france_data/reforms/inversion.py @@ -21,7 +21,8 @@ # Data creation -path = "/home/paul/Documents/projets/openfisca-france-data/tests/inversion/remplacement_2021.yaml" +cd = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) +path = os.path.join(cd, "tests", "inversion", "remplacement_2021.yaml") year = re.match(".*([0-9]{4}).yaml", path).group(1) with open(path) as yaml: From 673b990d0f5703a2bd1f1a03d5048e41c54d749c Mon Sep 17 00:00:00 2001 From: paul Date: Fri, 17 Feb 2023 19:21:31 +0100 Subject: [PATCH 37/41] =?UTF-8?q?enl=C3=A8ve=20paquets=20inutiles?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- openfisca_france_data/reforms/inversion.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/openfisca_france_data/reforms/inversion.py b/openfisca_france_data/reforms/inversion.py index 8bcf6ebe..7384d876 100644 --- a/openfisca_france_data/reforms/inversion.py +++ b/openfisca_france_data/reforms/inversion.py @@ -1,8 +1,6 @@ -import numpy as np import pandas as pd from yaml import load, SafeLoader import os -import sys import re from openfisca_core.periods import * @@ -17,7 +15,7 @@ tax_benefit_system = FranceTaxBenefitSystem() scenario = tax_benefit_system.new_scenario() -## First part : upwards (start from *_net, inverse to *_gross) +## First part : upwards (start from *_taxable, inverse to *_gross) # Data creation @@ -25,7 +23,7 @@ path = os.path.join(cd, "tests", "inversion", "remplacement_2021.yaml") year = re.match(".*([0-9]{4}).yaml", path).group(1) -with open(path) as yaml: +with open(path) as yaml: individus = pd.DataFrame.from_dict(load(yaml, Loader=SafeLoader)) # Inverse incomes from net to gross : the tested functions @@ -45,14 +43,14 @@ assert len(fails_chomage) + len(fails_retraite) ==0, "Some tests have failed.\n" + message -## Second part : downwards (start from brut obtained from inversion, goes back to imposable) +## Second part : downwards (start from gross obtained from inversion, goes back to taxable) -# Initialize the survey scenario with the brut +# Initialize the survey scenario with the gross (inverted) # init_single_entity(scenario, init_data) # simulation = scenario.new_simulation() -# # Computes *_imposable back from inversed *_brut +# # Computes *_taxable back from inverted *_gross # simulation.calculate('chomage_imposable', '2021-01') == 1000 # simulation.calculate('csg_deductible_chomage', '2021-01') == 0 From ce2266896b0f78a4d605ac31ed80bf53f7c0e64a Mon Sep 17 00:00:00 2001 From: paul Date: Sun, 19 Feb 2023 11:04:23 +0100 Subject: [PATCH 38/41] =?UTF-8?q?renomme=20et=20d=C3=A9place?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- openfisca_france_data/reforms/inversion.py | 59 ---------------------- tests/test_inversion.py | 4 +- 2 files changed, 2 insertions(+), 61 deletions(-) delete mode 100644 openfisca_france_data/reforms/inversion.py diff --git a/openfisca_france_data/reforms/inversion.py b/openfisca_france_data/reforms/inversion.py deleted file mode 100644 index 7384d876..00000000 --- a/openfisca_france_data/reforms/inversion.py +++ /dev/null @@ -1,59 +0,0 @@ -import pandas as pd -from yaml import load, SafeLoader -import os -import re - -from openfisca_core.periods import * - -from openfisca_france import FranceTaxBenefitSystem - -from openfisca_france_data.felin.input_data_builder.create_variables_individuelles import create_taux_csg_remplacement -from openfisca_france_data.common import create_revenus_remplacement_bruts - -margin = 1 - -tax_benefit_system = FranceTaxBenefitSystem() -scenario = tax_benefit_system.new_scenario() - -## First part : upwards (start from *_taxable, inverse to *_gross) - -# Data creation - -cd = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) -path = os.path.join(cd, "tests", "inversion", "remplacement_2021.yaml") -year = re.match(".*([0-9]{4}).yaml", path).group(1) - -with open(path) as yaml: - individus = pd.DataFrame.from_dict(load(yaml, Loader=SafeLoader)) - -# Inverse incomes from net to gross : the tested functions - -create_taux_csg_remplacement(individus, period(year), tax_benefit_system) -create_revenus_remplacement_bruts(individus, period(year), tax_benefit_system) - -# Test against chomage_brut_test - -fails_chomage = [i for i in individus.index if abs(individus.loc[i]["chomage_brut"]-individus.loc[i]["chomage_brut_test"])>=margin] -fails_retraite = [i for i in individus.index if abs(individus.loc[i]["retraite_brute"]-individus.loc[i]["retraite_brute_test"])>=margin] - -message = "".join( - ["For test {}, found {} for chomage_brut, tested against {}.\n".format(i,individus.loc[i]["chomage_brut"],individus.loc[i]["chomage_brut_test"]) for i in fails_chomage]+ - ["For test {}, found {} for retraite_brute, tested against {}.\n".format(i,individus.loc[i]["retraite_brute"],individus.loc[i]["retraite_brute_test"]) for i in fails_retraite] - ) - -assert len(fails_chomage) + len(fails_retraite) ==0, "Some tests have failed.\n" + message - -## Second part : downwards (start from gross obtained from inversion, goes back to taxable) - -# Initialize the survey scenario with the gross (inverted) - -# init_single_entity(scenario, init_data) -# simulation = scenario.new_simulation() - -# # Computes *_taxable back from inverted *_gross - -# simulation.calculate('chomage_imposable', '2021-01') == 1000 -# simulation.calculate('csg_deductible_chomage', '2021-01') == 0 -# simulation.calculate('csg_imposable_chomage', '2021-01') == 0 -# simulation.calculate('crds_chomage', '2021-01') == 1 - diff --git a/tests/test_inversion.py b/tests/test_inversion.py index 461ed852..167fc3c9 100644 --- a/tests/test_inversion.py +++ b/tests/test_inversion.py @@ -14,10 +14,10 @@ tax_benefit_system = FranceTaxBenefitSystem() scenario = tax_benefit_system.new_scenario() - + ## First part : upwards (start from *_taxable, inverse to *_gross) -# Data creation +# Data creation cd = os.path.dirname(__file__) path = os.path.join(cd, "inversion", "remplacement_2021.yaml") From 31aa515598f21a1a146386326f6a9f636be63061 Mon Sep 17 00:00:00 2001 From: paul Date: Sat, 1 Apr 2023 00:28:58 +0200 Subject: [PATCH 39/41] complexifie l'inversion avec salaire + chomage --- openfisca_france_data/common.py | 5 +++-- tests/inversion/remplacement_2021.yaml | 13 ++++++++++++- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/openfisca_france_data/common.py b/openfisca_france_data/common.py index 0e361cdf..369ce2e3 100644 --- a/openfisca_france_data/common.py +++ b/openfisca_france_data/common.py @@ -104,7 +104,7 @@ def create_salaire_de_base(individus, period = None, revenu_type = 'imposable', if name not in target: baremes_to_remove.append(name) - # We split since we cannot remove from dict while iterating + # We split since we cannot remove from dict while iterating for name in baremes_to_remove: del baremes_collection._children[name] @@ -400,6 +400,7 @@ def create_revenus_remplacement_bruts(individus, period, tax_benefit_system): individus.chomage_imposable.fillna(0, inplace = True) individus.retraite_imposable.fillna(0, inplace = True) + individus.salaire_net.fillna(0, inplace = True) parameters = tax_benefit_system.get_parameters_at_instant(period.start) csg = parameters.prelevements_sociaux.contributions_sociales.csg @@ -415,7 +416,7 @@ def create_revenus_remplacement_bruts(individus, period, tax_benefit_system): (individus.taux_csg_remplacement == 2) / (1 - taux_reduit) + (individus.taux_csg_remplacement >= 3) / (1 - taux_plein) ) - ) + ) - individus.salaire_net exonere_csg_chomage = ( (individus.taux_csg_remplacement < 2) | (individus.chomage_imposable <= seuil_chomage_net_exoneration) diff --git a/tests/inversion/remplacement_2021.yaml b/tests/inversion/remplacement_2021.yaml index c3215c51..dac08257 100644 --- a/tests/inversion/remplacement_2021.yaml +++ b/tests/inversion/remplacement_2021.yaml @@ -8,6 +8,18 @@ nbp: 100 chomage_imposable: 19000 # 11408 < revkire <= 14914 & nbp =1 : taux is taux_reduit BUT chomage_imposable < seuil d'exo so net == gross chomage_brut_test: 19000 +- name: "Chomage avec RFR sous seuil 2, personne seule, salaire non nul mais exonération car faible" + revkire: + nbp: 100 + chomage_imposable: 6000 # + salaire_net: 6000 + chomage_brut_test: 6000 +- name: "Chomage avec RFR sous seuil 2, personne seule, mais salaire non nul alors pas d'exonération" + revkire: 11410 + nbp: 100 + chomage_imposable: 11551.98 # + salaire_net: 18000 + chomage_brut_test: 12000 - name: "Chomage avec RFR sous seuil 2, personne seule, pas d'exonération" revkire: 11410 nbp: 100 @@ -83,4 +95,3 @@ nbp: 200 retraite_imposable: 20000 retraite_brute_test: 21254 # 20000/(1-0.059) - From a5b5511c9e4a75c45d887516487121834888d12f Mon Sep 17 00:00:00 2001 From: pdp <7412441+pzuldp@users.noreply.github.com> Date: Fri, 1 Mar 2024 17:27:52 +0100 Subject: [PATCH 40/41] Update CHANGELOG.md --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index d2359d01..27253407 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,12 @@ # Changelog +## 3.1.0 [#209](https://github.com/openfisca/openfisca-france-data/pull/209) + +New features + Introduce testing for income "inversion" (deduce gross from net) +Update features + Corrects the inversion functions + extends the inversion of unemployment benefit's taxation + ### 3.0.6 [#248](https://github.com/openfisca/openfisca-france-data/pull/248) * Technical changes - Correction d'une typo dans la PR précédente From 06f88bf26175445b419aea1a3235659caa587d30 Mon Sep 17 00:00:00 2001 From: pdp <7412441+pzuldp@users.noreply.github.com> Date: Fri, 1 Mar 2024 17:28:14 +0100 Subject: [PATCH 41/41] Update setup.py --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index fde2ac00..c82e7767 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ setup( name = "OpenFisca-France-Data", - version = "3.0.6", + version = "3.1.0", description = "OpenFisca-France-Data module to work with French survey data", long_description = long_description, long_description_content_type="text/markdown",