Skip to content

Commit

Permalink
unify path_within_bucket and environment constants
Browse files Browse the repository at this point in the history
* remove PATH_WRITING_S3 from env variables in argo
* deduce that path from the ENVIRONMENT constant
* and set the debug mode on "test" value (instead of "dev" previously)
  • Loading branch information
tgrandje committed Dec 20, 2024
1 parent 39b0192 commit 8ffff59
Show file tree
Hide file tree
Showing 9 changed files with 29 additions and 42 deletions.
15 changes: 7 additions & 8 deletions argo-pipeline/pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ spec:
command: [sh, -c]
args: ["mkdir -p /mnt/bin/src ;
mv /mnt/bin/argo-pipeline/src/* /mnt/bin/src ;
echo $PATH_WRITING_S3;
echo $ENVIRONMENT;
"]
volumeMounts:
- name: volume-workflow-tmp
Expand All @@ -134,11 +134,10 @@ spec:
value: minio.lab.sspcloud.fr
- name: MC_HOST_s3
value: https://$AWS_ACCESS_KEY_ID:$AWS_SECRET_ACCESS_KEY@$AWS_S3_ENDPOINT
- name: PATH_WRITING_S3
value: "test"
- name: ENVIRONMENT
# set value to "dev" to simplify pipeline execution (2 years, only topojson, etc.), use "preprod" or "prod" else
value: dev
# set value to "test" to simplify pipeline execution (2 years, only topojson, etc.), use "preprod" or "prod" else
# -> this will also configure the path_within_bucket constant
value: test

- name: download-all-sources
outputs:
Expand All @@ -150,7 +149,7 @@ spec:
image: inseefrlab/cartiflette:latest
command: [sh, -c]
args: ["
python /mnt/bin/src/download_all_sources.py --path $PATH_WRITING_S3;
python /mnt/bin/src/download_all_sources.py;
"]
volumeMounts:
- name: volume-workflow-tmp
Expand Down Expand Up @@ -196,7 +195,7 @@ spec:
- name: volume-workflow-tmp
mountPath: /mnt
args: ["
python /mnt/bin/src/make_geodata_datasets.py --path $PATH_WRITING_S3 --year '{{inputs.parameters.year}}';
python /mnt/bin/src/make_geodata_datasets.py --year '{{inputs.parameters.year}}';
"]
env: *env_parameters
Expand All @@ -216,7 +215,7 @@ spec:
- name: volume-workflow-tmp
mountPath: /mnt
args: ["
python /mnt/bin/src/make_metadata_datasets.py --path $PATH_WRITING_S3 --years '{{inputs.parameters.years}}';
python /mnt/bin/src/make_metadata_datasets.py --years '{{inputs.parameters.years}}';
"]
env: *env_parameters
Expand Down
1 change: 0 additions & 1 deletion argo-pipeline/src/catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
Create cartiflette's catalog
"""

import json
import logging

from s3fs import S3FileSystem
Expand Down
12 changes: 5 additions & 7 deletions argo-pipeline/src/download_all_sources.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,6 @@
parser = argparse.ArgumentParser(
description="Run Cartiflette pipeline download script."
)
parser.add_argument(
"-p", "--path", help="Path within bucket", default=PATH_WITHIN_BUCKET
)

default_years = ",".join(str(x) for x in range(2020, date.today().year + 1))
parser.add_argument(
Expand All @@ -63,12 +60,13 @@
args = parser.parse_args()

bucket = BUCKET
path_within_bucket = args.path
years = args.years
skip = args.skip

if os.environ.get("ENVIRONMENT", None) == "dev":
logging.warning("dev environment -> restrict download to 2023 & 2024 only")
if os.environ.get("ENVIRONMENT", None) == "test":
logging.warning(
"test environment -> restrict download to 2023 & 2024 only"
)
years = "2023,2024"

if years:
Expand All @@ -80,7 +78,7 @@
try:
if not skip:
results = download_all(
bucket, path_within_bucket, fs=fs, upload=True, years=years
bucket, PATH_WITHIN_BUCKET, fs=fs, upload=True, years=years
)
else:
results = dict()
Expand Down
4 changes: 2 additions & 2 deletions argo-pipeline/src/filter_vintages_operationnal.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@

years = sorted(list(years_geodatasets | years_metadata))

if os.environ.get("ENVIRONMENT", None) == "dev":
logging.warning("dev environment -> restrict generation to 2023, 2024 ")
if os.environ.get("ENVIRONMENT", None) == "test":
logging.warning("test environment -> restrict generation to 2023, 2024 ")
years = [2023, 2024]

logger.info(
Expand Down
9 changes: 2 additions & 7 deletions argo-pipeline/src/make_geodata_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,6 @@
parser = argparse.ArgumentParser(
description="Preprocess geodatasets from raw sources"
)
parser.add_argument(
"-p", "--path", help="Path within bucket", default=PATH_WITHIN_BUCKET
)

parser.add_argument(
"-y", "--year", help="Vintage to perform computation on", default="2023"
)
Expand All @@ -54,7 +50,6 @@

# Parse arguments
args = parser.parse_args()
path_within_bucket = args.path
year = args.year
simplifications = args.simplify

Expand All @@ -63,10 +58,10 @@


def main(
path_within_bucket,
simplifications: List[int],
bucket=BUCKET,
year: int = None,
path_within_bucket: str = PATH_WITHIN_BUCKET,
):

created = create_one_year_geodataset_batch(
Expand All @@ -90,4 +85,4 @@ def main(


if __name__ == "__main__":
data = main(path_within_bucket, simplifications=simplifications, year=year)
data = main(simplifications=simplifications, year=year)
10 changes: 3 additions & 7 deletions argo-pipeline/src/make_metadata_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,6 @@
parser = argparse.ArgumentParser(
description="Preprocess metadata from raw sources"
)
parser.add_argument(
"-p", "--path", help="Path within bucket", default=PATH_WITHIN_BUCKET
)

parser.add_argument(
"-y", "--years", help="Vintage to perform computation on", default="[]"
Expand All @@ -41,7 +38,6 @@
args = parser.parse_args()

bucket = BUCKET
path_within_bucket = args.path
years = args.years

years = json.loads(years)
Expand All @@ -50,8 +46,8 @@


def main(
path_within_bucket,
bucket=BUCKET,
path_within_bucket: str = PATH_WITHIN_BUCKET,
bucket: str = BUCKET,
years: int = None,
):

Expand Down Expand Up @@ -125,4 +121,4 @@ def main(


if __name__ == "__main__":
data = main(path_within_bucket, years=years)
data = main(years=years)
8 changes: 4 additions & 4 deletions argo-pipeline/src/select_downstream_vintage_to_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@
# {"IGN": {"ADMINEXPRESS": {"EXPRESS-COG-TERRITOIRE": {"guadeloupe": {"2024": {"downloaded": true, "paths": {"COMMUNE": ["projet-cartiflette/test/provider=IGN/dataset_family=ADMINEXPRESS/source=EXPRESS-COG-TERRITOIRE/year=2024/administrative_level=None/crs=5490/origin=raw/vectorfile_format=shp/territory=guadeloupe/simplification=0/COMMUNE.shp"]}}}, "martinique": {"2024": {"downloaded": true, "paths": {"COMMUNE": ["projet-cartiflette/test/provider=IGN/dataset_family=ADMINEXPRESS/source=EXPRESS-COG-TERRITOIRE/year=2024/administrative_level=None/crs=5490/origin=raw/vectorfile_format=shp/territory=martinique/simplification=0/COMMUNE.shp"]}}}, "guyane": {"2024": {"downloaded": true, "paths": {"COMMUNE": ["projet-cartiflette/test/provider=IGN/dataset_family=ADMINEXPRESS/source=EXPRESS-COG-TERRITOIRE/year=2024/administrative_level=None/crs=2972/origin=raw/vectorfile_format=shp/territory=guyane/simplification=0/COMMUNE.shp"]}}}, "reunion": {"2024": {"downloaded": true, "paths": {"COMMUNE": ["projet-cartiflette/test/provider=IGN/dataset_family=ADMINEXPRESS/source=EXPRESS-COG-TERRITOIRE/year=2024/administrative_level=None/crs=2975/origin=raw/vectorfile_format=shp/territory=reunion/simplification=0/COMMUNE.shp"]}}}, "mayotte": {"2024": {"downloaded": true, "paths": {"COMMUNE": ["projet-cartiflette/test/provider=IGN/dataset_family=ADMINEXPRESS/source=EXPRESS-COG-TERRITOIRE/year=2024/administrative_level=None/crs=4326/origin=raw/vectorfile_format=shp/territory=mayotte/simplification=0/COMMUNE.shp"]}}}, "metropole": {"2024": {"downloaded": true, "paths": {"COMMUNE": ["projet-cartiflette/test/provider=IGN/dataset_family=ADMINEXPRESS/source=EXPRESS-COG-TERRITOIRE/year=2024/administrative_level=None/crs=2154/origin=raw/vectorfile_format=shp/territory=metropole/simplification=0/COMMUNE.shp"]}}}}}}, "Insee": {"COG": {"DEPARTEMENT": {"france_entiere": {"2024": {"downloaded": false, "paths": null}}}, "REGION": {"france_entiere": {"2024": {"downloaded": false, "paths": null}}}}, "TAGC": {"APPARTENANCE": {"france_entiere": {"2024": {"downloaded": true, "paths": {"table-appartenance-geo-communes-2024": ["projet-cartiflette/test/provider=Insee/dataset_family=TAGC/source=APPARTENANCE/year=2024/administrative_level=None/crs=None/origin=raw/vectorfile_format=xlsx/territory=france_entiere/simplification=0/table-appartenance-geo-communes-2024.xlsx"]}}}}}}}


if os.environ.get("ENVIRONMENT", None) == "dev":
logging.warning("dev environment -> force generation of only 2023 & 2024")
if os.environ.get("ENVIRONMENT", None) == "test":
logging.warning("test environment -> force generation of only 2023 & 2024")


def store_to_json(name, years):
Expand All @@ -55,7 +55,7 @@ def store_to_json(name, years):

def filter_geodata(results):
"filter the downloaded vintages of geodatasets"
if os.environ.get("ENVIRONMENT", None) == "dev":
if os.environ.get("ENVIRONMENT", None) == "test":
return store_to_json("geodatasets_years.json", [2023, 2024])

years = set()
Expand All @@ -81,7 +81,7 @@ def filter_geodata(results):

def filter_metadata(results):
"filter the downloaded vintages of metadatasets"
if os.environ.get("ENVIRONMENT", None) == "dev":
if os.environ.get("ENVIRONMENT", None) == "test":
return store_to_json("metadata_years.json", [2023, 2024])

years = set()
Expand Down
10 changes: 5 additions & 5 deletions cartiflette/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
load_dotenv(override=True)

BUCKET = "projet-cartiflette"
PATH_WITHIN_BUCKET = "test"
PATH_WITHIN_BUCKET = os.environ.get("ENVIRONMENT", "test")
ENDPOINT_URL = "https://minio.lab.sspcloud.fr"

kwargs = {}
Expand All @@ -31,13 +31,13 @@
# PIPELINE CONFIG
# =============================================================================

# set to low resolution datasets for dev environment, high for anything else
# set to low resolution datasets for test environment, high for anything else
INTERMEDIATE_FORMAT = "geojson"
DATASETS_HIGH_RESOLUTION = os.environ.get("ENVIRONMENT", "dev") != "dev"
MAPSHAPER_QUIET = os.environ.get("ENVIRONMENT", "dev") != "dev"
DATASETS_HIGH_RESOLUTION = os.environ.get("ENVIRONMENT", "test") != "test"
MAPSHAPER_QUIET = os.environ.get("ENVIRONMENT", "test") != "test"

if not DATASETS_HIGH_RESOLUTION:
warnings.warn(
"cartiflette is running with dev configuration, using only low "
"cartiflette is running with test configuration, using only low "
"resolution datasets"
)
2 changes: 1 addition & 1 deletion cartiflette/pipeline_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@
"POPULATION-COM": ["Insee", "POPULATION", "POPULATION-IRIS-COM"],
}

if os.environ.get("ENVIRONMENT", "dev") != "dev":
if os.environ.get("ENVIRONMENT", "test") != "test":
PIPELINE_CRS = [2154, 4326, 3857]
PIPELINE_SIMPLIFICATION_LEVELS = [100, 40]
PIPELINE_FORMATS = ["geojson", "topojson", "gpkg"]
Expand Down

0 comments on commit 8ffff59

Please sign in to comment.