Skip to content

Commit

Permalink
feat: add agefiph (#111)
Browse files Browse the repository at this point in the history
  • Loading branch information
ColinMaudry authored Aug 10, 2023
1 parent d7cb55a commit dfe47c3
Show file tree
Hide file tree
Showing 22 changed files with 686 additions and 5 deletions.
2 changes: 2 additions & 0 deletions .template.env
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ ANNUAIRE_ENTREPRISES_API_URL=https://recherche-entreprises.api.gouv.fr
### sources
###

AGEFIPH_SERVICES_API_URL=https://www.agefiph.fr/jsonapi/node/aide_service
AGEFIPH_STRUCTURES_FILE_URL=
AIRFLOW_CONN_S3_SOURCES=
BAN_API_URL=https://api-adresse.data.gouv.fr
CD35_FILE_URL=https://data.ille-et-vilaine.fr/dataset/8d5ec0f0-ebe1-442d-9d99-655b37d5ad07/resource/8b781e9d-e11d-486c-98cf-0f63abfae8ed/download/annuaire_sociale_fixe.csv
Expand Down
276 changes: 276 additions & 0 deletions analyse/notebooks/agefiph/api-services.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,276 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "45495455",
"metadata": {},
"source": [
"Le but de ce notebook est de générer des structures et services pour l'offre de service de l'agefiph.\n",
"\n",
"Ce notebook prend 2 sources de données :\n",
"\n",
"* une liste de strucures issu d'un tableau grist maintenu par data.inclusion,\n",
"* une liste de services issu de l'api de l'agefiph.\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "517ed275",
"metadata": {},
"outputs": [],
"source": [
"import requests\n",
"import numpy as np\n",
"import pandas as pd\n",
"import trafilatura\n",
"import hashlib\n",
"from uuid import UUID\n",
"\n",
"\n",
"def html_to_markdown(s: str):\n",
" if s is None or s == \"\":\n",
" return s\n",
" return trafilatura.extract(trafilatura.load_html(\"<html>\" + s + \"</html>\"))\n",
"\n",
"\n",
"# https://grist.incubateur.net/o/datainclusion/bWqnEafQaLgc/Partage-de-donnes-AGEFIPH-Mars-Avril-2023/p/4\n",
"STRUCTURES_TABLE_URL = \"https://grist.incubateur.net/o/datainclusion/api/docs/bWqnEafQaLgcTvFv7rv6hF/download/csv?tableId=Structures\"\n",
"SERVICES_API_URL = \"https://www.agefiph.fr/jsonapi/node/aide_service\"\n",
"\n",
"HEADERS = {\"User-Agent\": \"data-inclusion\"}\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5d20e16b",
"metadata": {},
"outputs": [],
"source": [
"raw_structures_df = pd.read_csv(STRUCTURES_TABLE_URL, dtype=str).replace([np.nan, \"\"], None)\n",
"raw_services_df = pd.json_normalize(\n",
" requests.get(SERVICES_API_URL, headers=HEADERS).json()[\"data\"]\n",
").replace([np.nan, \"\"], None)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "15a0ca22",
"metadata": {},
"outputs": [],
"source": [
"raw_structures_df = raw_structures_df\n",
"\n",
"raw_structures_df.info()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7bd7e018",
"metadata": {},
"outputs": [],
"source": [
"raw_services_df = raw_services_df[\n",
" [\n",
" \"id\",\n",
" \"attributes.created\",\n",
" \"attributes.changed\",\n",
" \"attributes.title\",\n",
" \"attributes.field_titre_card_employeur\",\n",
" \"attributes.field_essentiel_ph.processed\",\n",
" \"attributes.field_essentiel_employeur.processed\",\n",
" \"attributes.field_texte_brut_long\",\n",
" \"relationships.field_thematique.data\",\n",
" ]\n",
"]\n",
"\n",
"raw_services_df.info()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d5ad9896",
"metadata": {},
"outputs": [],
"source": [
"pd.json_normalize(\n",
" raw_services_df.rename(columns={\"id\": \"service_id\"}).to_dict(orient=\"records\"),\n",
" record_path=\"relationships.field_thematique.data\",\n",
" meta=\"service_id\",\n",
").info()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e2ad9881",
"metadata": {},
"outputs": [],
"source": [
"DI_THEMATIQUES_BY_AGEFIPH_THEMATIQUE_ID = {\n",
" # Source https://grist.incubateur.net/o/datainclusion/uVsB8pabQGoe/Thmatiques/p/13\n",
" \"4e08047f-b0ed-431a-9182-61e8e61b1486\": \"handicap--favoriser-le-retour-et-le-maintien-dans-lemploi\",\n",
" \"11618ce3-e59b-404f-8eb2-5763215464f2\": \"handicap--favoriser-le-retour-et-le-maintien-dans-lemploi\",\n",
" \"60c25ci7-61sc-89a9-ny54-126hslf808a2\": \"handicap--connaissance-des-droits-des-travailleurs\",\n",
" \"51be0003-13d8-4ffa-9923-248e7aa4a227\": None,\n",
" \"ddf0fa87-2ee0-481c-a258-96985b7826c3\": None,\n",
" \"cb2c9fec-c190-4e2f-aeee-6da818109bf8\": \"handicap--favoriser-le-retour-et-le-maintien-dans-lemploi\",\n",
" \"78b28acb-803e-4b06-ab77-58dabfbd8571\": \"handicap--adaptation-au-poste-de-travail\",\n",
" \"366eb399-1e6c-4609-8066-d1504fae2a8e\": None,\n",
" \"907a8c33-5c56-49d3-bd64-a736a9ceac76\": None,\n",
" \"5d8c88d8-db03-4f27-b517-d7016896b01a\": None,\n",
" \"fb5e6180-290b-4216-ba68-624d25defa3a\": \"handicap--favoriser-le-retour-et-le-maintien-dans-lemploi\",\n",
" \"03228d62-2a59-49d8-8443-b25cb2e684b9\": \"accompagnement-social-et-professionnel-personnalise--definition-du-projet-professionnel\",\n",
" \"f9ab3e06-af51-463a-aaf7-7b04a28e047f\": \"se-former--trouver-sa-formation\",\n",
" \"aeab1d68-4e89-4e2a-a612-d8645e3999d8\": \"creation-activite--definir-son-projet-de-creation-dentreprise\",\n",
" \"f4551558-8315-4708-8357-5ecc89751bc6\": \"handicap--faire-reconnaitre-un-handicap\",\n",
" \"4b8b0473-52c2-4a21-956d-d7d68a7053b5\": None,\n",
"}\n",
"\n",
"\n",
"def map_service(row) -> dict:\n",
" service = {}\n",
" service[\"id\"] = row[\"id\"]\n",
" service[\"date_creation\"] = row[\"attributes.created\"]\n",
" service[\"date_maj\"] = row[\"attributes.changed\"]\n",
" service[\"nom\"] = row[\"attributes.title\"]\n",
" service[\"contact_public\"] = True\n",
" service[\"presentation_resume\"] = row[\"attributes.field_titre_card_employeur\"]\n",
"\n",
" service[\"presentation_detail\"] = \"\"\n",
" if row[\"attributes.field_essentiel_ph.processed\"] is not None:\n",
" service[\"presentation_detail\"] += (\n",
" \"<p>Pour la personne handicapée :</p>\"\n",
" + row[\"attributes.field_essentiel_ph.processed\"]\n",
" )\n",
" if row[\"attributes.field_essentiel_employeur.processed\"] is not None:\n",
" service[\"presentation_detail\"] += (\n",
" \"<p>Pour l'employeur :</p>\"\n",
" + row[\"attributes.field_essentiel_employeur.processed\"]\n",
" )\n",
" if row[\"attributes.field_texte_brut_long\"] is not None:\n",
" service[\"presentation_detail\"] = (\n",
" row[\"attributes.field_texte_brut_long\"] + service[\"presentation_detail\"]\n",
" )\n",
" service[\"presentation_detail\"] = html_to_markdown(service[\"presentation_detail\"])\n",
" service[\"presentation_detail\"] = service[\"presentation_detail\"] or None\n",
"\n",
" service[\"thematiques\"] = list(\n",
" set(\n",
" [\n",
" v\n",
" for v in [\n",
" DI_THEMATIQUES_BY_AGEFIPH_THEMATIQUE_ID[\n",
" agefiph_thematique_data[\"id\"]\n",
" ]\n",
" for agefiph_thematique_data in row[\n",
" \"relationships.field_thematique.data\"\n",
" ]\n",
" ]\n",
" if v is not None\n",
" ]\n",
" )\n",
" )\n",
"\n",
" return service\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9f98dd4d",
"metadata": {},
"outputs": [],
"source": [
"template_services_df = raw_services_df.apply(map_service, axis=1, result_type=\"expand\")\n",
"\n",
"template_services_df.info()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e0680158",
"metadata": {},
"outputs": [],
"source": [
"# Cartesian products\n",
"services_df = (\n",
" raw_structures_df[\n",
" [\n",
" \"id\",\n",
" \"courriel\",\n",
" \"telephone\",\n",
" \"adresse\",\n",
" \"commune\",\n",
" \"code_postal\",\n",
" \"code_insee\",\n",
" ]\n",
" ]\n",
" .rename(columns={\"id\": \"structure_id\"})\n",
" .join(template_services_df, how=\"cross\")\n",
")\n",
"\n",
"# Making service id unique across all regions\n",
"services_df = services_df.assign(\n",
" id=services_df.apply(\n",
" lambda row: str(\n",
" UUID(\n",
" hex=hashlib.md5((row[\"structure_id\"] + row[\"id\"]).encode()).hexdigest()\n",
" )\n",
" ),\n",
" axis=1,\n",
" )\n",
")\n",
"\n",
"services_df.info()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ecfc54c4",
"metadata": {},
"outputs": [],
"source": [
"services_df.to_json(\"services.json\", orient=\"records\", force_ascii=False)\n",
"raw_structures_df.to_json(\"structures.json\", orient=\"records\", force_ascii=False)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "85c609c1",
"metadata": {},
"outputs": [],
"source": [
"services_df\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.11"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
21 changes: 21 additions & 0 deletions api/src/data_inclusion/api/entrypoints/fastapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ def create_app() -> fastapi.FastAPI:


def list_structures(
request: fastapi.Request,
db_session: orm.Session,
source: Optional[str] = None,
id_: Optional[str] = None,
Expand All @@ -108,6 +109,10 @@ def list_structures(
if source is not None:
query = query.filter_by(source=source)

# FIXME: this is a temporary hack
if request.user.username != "dora-staging-stream":
query = query.filter(models.Structure.source != "agefiph")

if id_ is not None:
query = query.filter_by(id=id_)

Expand Down Expand Up @@ -168,6 +173,7 @@ def list_structures(
summary="Lister les structures consolidées",
)
def list_structures_endpoint(
request: fastapi.Request,
source: Annotated[str | SkipJsonSchema[None], fastapi.Query()] = None,
id: Annotated[str | SkipJsonSchema[None], fastapi.Query()] = None,
typologie: Annotated[
Expand Down Expand Up @@ -210,6 +216,7 @@ def list_structures_endpoint(
"""

return list_structures(
request,
db_session,
source=source,
id_=id,
Expand Down Expand Up @@ -265,6 +272,7 @@ def list_sources_endpoint(


def list_services(
request: fastapi.Request,
db_session: orm.Session,
source: Optional[str] = None,
thematique: Optional[schema.Thematique] = None,
Expand All @@ -281,6 +289,10 @@ def list_services(
if source is not None:
query = query.filter(models.Structure.source == source)

# FIXME: this is a temporary hack
if request.user.username != "dora-staging-stream":
query = query.filter(models.Service.source != "agefiph")

if departement is not None:
query = query.filter(
sqla.or_(
Expand Down Expand Up @@ -339,6 +351,7 @@ def list_services(
summary="Lister les services consolidées",
)
def list_services_endpoint(
request: fastapi.Request,
db_session=fastapi.Depends(db.get_session),
source: Annotated[str | SkipJsonSchema[None], fastapi.Query()] = None,
thematique: Annotated[
Expand All @@ -355,6 +368,7 @@ def list_services_endpoint(
] = None,
):
return list_services(
request,
db_session,
source=source,
thematique=thematique,
Expand Down Expand Up @@ -388,6 +402,7 @@ def retrieve_service_endpoint(


def search_services(
request: fastapi.Request,
db_session: orm.Session,
sources: Optional[list[str]] = None,
commune_instance: Optional[models.Commune] = None,
Expand All @@ -404,6 +419,10 @@ def search_services(
if sources is not None:
query = query.filter(models.Service.source == sqla.any_(sqla.literal(sources)))

# FIXME: this is a temporary hack
if request.user.username != "dora-staging-stream":
query = query.filter(models.Service.source != "agefiph")

if commune_instance is not None:
# filter by zone de diffusion
query = query.filter(
Expand Down Expand Up @@ -560,6 +579,7 @@ def _items_to_mappings(items: list) -> list[dict]:
summary="Rechercher des services",
)
def search_services_endpoint(
request: fastapi.Request,
db_session=fastapi.Depends(db.get_session),
source: Annotated[
str | SkipJsonSchema[None],
Expand Down Expand Up @@ -645,6 +665,7 @@ def search_services_endpoint(
sources = [source]

return search_services(
request,
db_session,
sources=sources,
commune_instance=commune_instance,
Expand Down
2 changes: 2 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ x-airflow-common:
AIRFLOW_CONN_PG: postgresql://data-inclusion:data-inclusion@target-db:5432/data-inclusion

# Variables
AIRFLOW_VAR_AGEFIPH_SERVICES_API_URL: ${AGEFIPH_SERVICES_API_URL}
AIRFLOW_VAR_AGEFIPH_STRUCTURES_FILE_URL: ${AGEFIPH_STRUCTURES_FILE_URL}
AIRFLOW_VAR_DBT_PROJECT_DIR: /opt/airflow/dbt
AIRFLOW_VAR_BAN_API_URL: ${BAN_API_URL}
AIRFLOW_VAR_CD35_FILE_URL: ${CD35_FILE_URL}
Expand Down
Loading

0 comments on commit dfe47c3

Please sign in to comment.