-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSnakefile
executable file
·135 lines (109 loc) · 3.53 KB
/
Snakefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import json
import shutil
from datetime import datetime
from pathlib import Path
from glob import glob
import geopandas
import irv_datapkg
import pandas
import requests
import shapely
DATAPKG_VERSION = "0.2.0"
# ZENODO_URL = "sandbox.zenodo.org"
ZENODO_URL = "zenodo.org"
BOUNDARIES = irv_datapkg.read_boundaries(Path("."))
BOUNDARY_LU = BOUNDARIES.set_index("CODE_A3")
envvars:
"ZENODO_TOKEN",
"CDSAPI_URL",
"CDSAPI_KEY"
def boundary_geom(iso3):
return BOUNDARY_LU.loc[iso3, "geometry"]
def boundary_adm0_a3(iso3):
"""Get the ADM0 A3 code corresponding to this boundary
In most cases this is a one-to-one mapping with no change, but where we have
some territories split into custom units (for example, Kiribati, East and
West of the antimeridian) this looks up the standard parent ADM0 A3 code.
"""
return BOUNDARY_LU.loc[iso3, "ADM0_A3"]
#
# Top-level rules
#
rule clean:
shell:
"rm -rf data"
rule all:
input:
expand("data/{ISO3}/datapackage.json", ISO3=BOUNDARIES.CODE_A3),
rule all_uploaded:
input:
expand("zenodo/{ISO3}.deposited", ISO3=BOUNDARIES.CODE_A3),
rule all_published:
input:
expand("zenodo/{ISO3}.published", ISO3=BOUNDARIES.CODE_A3),
#
# Data package
#
rule datapackage:
input:
checksums="data/{ISO3}/md5sum.txt",
output:
json="data/{ISO3}/datapackage.json",
script:
"scripts/generate_datapackage_json.py"
rule checksums:
# input must require all the data package files
# - summary CSVs require multiple TIFFs in turn
input:
"data/{ISO3}/aqueduct_flood.csv",
"data/{ISO3}/geoboundaries.csv",
"data/{ISO3}/gridfinder/grid__{ISO3}.gpkg",
"data/{ISO3}/gridfinder/targets__{ISO3}.tif",
"data/{ISO3}/isimip_heat_drought.csv",
"data/{ISO3}/jrc_ghsl.csv",
"data/{ISO3}/jrc_floods.csv",
"data/{ISO3}/openstreetmap/openstreetmap_rail__{ISO3}.gpkg",
"data/{ISO3}/openstreetmap/openstreetmap_roads-tertiary__{ISO3}.gpkg",
"data/{ISO3}/storm.csv",
"data/{ISO3}/wri_powerplants/wri-powerplants__{ISO3}.gpkg",
"data/{ISO3}/copernicus_lulc/copernicus_lulc__{ISO3}.tif",
"data/{ISO3}/copernicus_dem/copernicus_dem__{ISO3}.tif",
output:
checksums="data/{ISO3}/md5sum.txt",
shell:
"""
cd data/{wildcards.ISO3}
md5sum **/*.* | grep "tif\\|gpkg" | sort -k 2 > md5sum.txt
"""
rule clip_tiff:
input:
tiff="incoming_data/{DATASET}/{SLUG}.tif",
output:
tiff="data/{ISO3}/{DATASET}/{SLUG}__{ISO3}.tif",
run:
irv_datapkg.crop_raster(input.tiff, output.tiff, boundary_geom(wildcards.ISO3))
rule clip_geopackage:
input:
gpkg="incoming_data/{DATASET}/{SLUG}.gpkg",
output:
gpkg="data/{ISO3}/{DATASET}/{SLUG}__{ISO3}.gpkg",
run:
gdf = geopandas.read_file(input.gpkg, engine="pyogrio")
geom = boundary_geom(wildcards.ISO3)
(xmin, ymin, xmax, ymax) = geom.bounds
clipped = gdf.cx[xmin:xmax, ymin:ymax]
clipped.to_file(
output.gpkg, driver="GPKG", layer=wildcards.SLUG, engine="pyogrio"
)
include: "rules/aqueduct_flood.smk"
include: "rules/copernicus_dem.smk"
include: "rules/copernicus_lulc.smk"
include: "rules/geoboundaries.smk"
include: "rules/gridfinder.smk"
include: "rules/isimip_heat_drought.smk"
include: "rules/jrc_floods.smk"
include: "rules/jrc_ghsl.smk"
include: "rules/openstreetmap.smk"
include: "rules/storm.smk"
include: "rules/wri_powerplants.smk"
include: "rules/zenodo.smk"