Skip to content

Commit

Permalink
Calage sur la strucure d'age
Browse files Browse the repository at this point in the history
  • Loading branch information
sylvainipp committed Aug 27, 2024
1 parent 4e8141f commit 65d3a2b
Show file tree
Hide file tree
Showing 5 changed files with 126 additions and 2 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Le tableur est issu de https://www.insee.fr/fr/statistiques/5894083?sommaire=5760764 , onglet population.
Quelques changements ont été faits pour mieux le parser : conversion en csv, séparateur des milliers mis à "." plutôt qu'à " " (dans Fichier/Options/Options avancées/Utiliser les séparateurs systèmes/Séparateur des milliers), 105 + changé en 105.
1 change: 1 addition & 0 deletions openfisca_france_data/erfs_fpr/scenario.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ class ErfsFprSurveyScenario(AbstractErfsSurveyScenario):
# Les variables OpenFisca qu'on va utiliser avec les données en entrée.
used_as_input_variables = [
"activite",
"age",
#"autonomie_financiere",
"categorie_salarie",
"categorie_non_salarie",
Expand Down
47 changes: 45 additions & 2 deletions openfisca_france_data/model/calage.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
from itertools import izip
#from itertools import izip

from numpy import arange, array, floor, where
from numpy import minimum as min_
import pandas as pd
from pathlib import Path

from openfisca_france_data.model.base import * # noqa

from openfisca_france_data import openfisca_france_data_location

class nbinde(Variable):
value_type = int
Expand Down Expand Up @@ -227,3 +230,43 @@ def formula(menage, period, parameters):
# ratio = (( (typmen15!=res)).sum())/((typmen15!=0).sum())
# ratio 2.7 % d'erreurs enfant non nés et erreur d'enfants
return res

def create_dic_calage(year, base_year, liste_variable, **kwargs):
dico_calage = dict()
assert all(variable in ["age"] for variable in liste_variable), "A variable is asked to be calibrated, but no matching aggregate is implemented"
if "age" in liste_variable:
dico_calage = create_dic_age(year, base_year, dico_calage, **kwargs)
return dico_calage

def create_dic_age(year, base_year, dico_calage, base_initiale = None):
demographie_file = Path(
openfisca_france_data_location,
"openfisca_france_data",
"assets",
"aggregats",
"demographie",
"demographie_insee.csv"
)
df_demographie = pd.read_csv(demographie_file, skipfooter = 9, skiprows = 1, encoding_errors = 'replace', sep = ';', thousands = ".")
df_demographie["age_calage"] = df_demographie[df_demographie.columns[0]].astype(int) # On passe par columns à cause du Â
for annee in range(base_year, year + 1): # On limite à 100 l'age max, pour éviter l'absence de données
df_demographie.loc[df_demographie["age_calage"] == 100, str(annee)] = sum(df_demographie[df_demographie["age_calage"] >= 100][str(annee)])
df_demographie = df_demographie[df_demographie["age_calage"] <= 100]
if base_initiale is None:
dic_age = pd.Series(df_demographie[str(year)].values, index = df_demographie["age_calage"]).to_dict()
dico_calage["age_calage"] = dic_age
return dico_calage

df_demographie["variation_totale"] = df_demographie[str(year)] / df_demographie[str(base_year)]

base_initiale["age_calage"] = base_initiale["age_calage"].astype(int)
base_initiale.loc[base_initiale["age_calage"] == 100, "N"] = sum(base_initiale[base_initiale["age_calage"] >= 100]["N"])
base_initiale = base_initiale[base_initiale["age_calage"] <= 100]

cible_finale = pd.merge(df_demographie, base_initiale, on = ("age_calage"), how = "inner")
cible_finale["cible"] = round(cible_finale["N"] * cible_finale["variation_totale"])
dic_age = pd.Series(cible_finale["cible"].values, index = cible_finale["age_calage"]).to_dict()
dico_calage["age_calage"] = dic_age
return dico_calage


31 changes: 31 additions & 0 deletions openfisca_france_data/reforms/variables_calibration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import numpy as np
from numpy import minimum as min_

from openfisca_france.model.base import * # noqa analysis:ignore
from openfisca_core.reforms import Reform

import logging

log = logging.getLogger(__name__)
def create_calibration_tax_benefit_system(survey_scenario = None):
class variables_pour_calibration(Reform):
name = 'variables_pour_calibration'

def apply(self):

class age_calage(Variable):
value_type = str
entity = Individu
label = "Age censuré pour calage"
definition_period = YEAR
unit = 'years'
is_period_size_independent = True
set_input = set_input_dispatch_by_period

def formula(individu, period):
age = individu('age', period.first_month)
return min_(age, 100)

self.add_variable(age_calage)
scenario_for_calibration = variables_pour_calibration(survey_scenario)
return scenario_for_calibration
47 changes: 47 additions & 0 deletions tests/test_calage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import os

import pytest

from openfisca_survey_manager.calibration import Calibration # type: ignore
from openfisca_france_data import openfisca_france_data_location
from openfisca_france_data.model.calage import create_dic_calage
from openfisca_france_data.reforms.variables_calibration import create_calibration_tax_benefit_system
from openfisca_france_data.erfs_fpr.scenario import ( # type: ignore
ErfsFprSurveyScenario,
)
from openfisca_france_data import france_data_tax_benefit_system

@pytest.fixture
def location() -> str:
return openfisca_france_data_location


def test_calage(survey_scenario, fake_input_data, location, year: int = 2015):
# On ititialise le survey scenario
survey_scenario2 = ErfsFprSurveyScenario.create(
tax_benefit_system = create_calibration_tax_benefit_system(france_data_tax_benefit_system),
period = year,
)
survey_scenario = survey_scenario2

# On charge les données
input_data = fake_input_data(year)

# On fait la calibration
parameters = dict(
method = "logit",
invlo = 3,
up = 3,
)

base_year = 2013

target_margins_by_variable = create_dic_calage(year, base_year, ["age"])

calibration_kwargs = {'target_margins_by_variable': target_margins_by_variable, 'parameters': parameters}

# On initialise le survey scenario
survey_scenario.init_from_data(data = dict(input_data_frame = input_data), calibration_kwargs = calibration_kwargs)
weight_base = sum(survey_scenario.calculate_variable("wprm", period = base_year))

assert weight_base < sum(survey_scenario.calculate_variable("wprm", period = year))

0 comments on commit 65d3a2b

Please sign in to comment.