Skip to content

Commit 61b4b53

Browse files
committed
Update json schema creation to v0.3.0
1 parent ea78f72 commit 61b4b53

14 files changed

+275
-194
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ you can for example run:
8989
- GeoJSON Features (with indentation and max. 100 features):
9090
`fiboa create-geojson example.parquet -o dest-folder -n 100 -i 2 -f`
9191

92-
Check `fiboa create-geoparquet --help` for more details.
92+
Check `fiboa create-geojson --help` for more details.
9393

9494
### Inspect fiboa GeoParquet file
9595

fiboa_cli/__init__.py

Lines changed: 34 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,12 @@
66
import click
77
import pandas as pd
88

9-
from .const import COMPRESSION_METHODS, CORE_COLUMNS
9+
from .const import (
10+
COMPRESSION_METHODS,
11+
CORE_COLUMNS,
12+
FIBOA_GEOJSON_DATATYPES_SCHEMA,
13+
FIBOA_SPECIFICAION_SCHEMA,
14+
)
1015
from .convert import convert as convert_
1116
from .convert import list_all_converter_ids, list_all_converters
1217
from .create_geojson import create_geojson as create_geojson_
@@ -95,13 +100,6 @@ def describe(file, json, num=10, column=[]):
95100
value, ["parquet", "geoparquet", "json", "geojson"]
96101
),
97102
)
98-
@click.option(
99-
"--schemas",
100-
"-s",
101-
multiple=True,
102-
callback=lambda ctx, param, value: check_ext_schema_for_cli(value, allow_none=False),
103-
help="Maps a remote fiboa schema URL to a local file. First the URL, then the local file path. Separated with a comma character. Example: https://example.com/schema.yaml,/path/to/schema.yaml",
104-
)
105103
@click.option(
106104
"--data",
107105
"-d",
@@ -110,6 +108,13 @@ def describe(file, json, num=10, column=[]):
110108
help="EXPERIMENTAL: Validate the data in the GeoParquet file. Enabling this might be slow or exceed memory. Default is False.",
111109
default=False,
112110
)
111+
@click.option(
112+
"--schemas",
113+
"-s",
114+
multiple=True,
115+
callback=lambda ctx, param, value: check_ext_schema_for_cli(value, allow_none=False),
116+
help="Maps a remote fiboa schema URL to a local file. First the URL, then the local file path. Separated with a comma character. Example: https://example.com/schema.yaml,/path/to/schema.yaml",
117+
)
113118
@click.option(
114119
"--timer",
115120
is_flag=True,
@@ -118,7 +123,7 @@ def describe(file, json, num=10, column=[]):
118123
default=False,
119124
hidden=True,
120125
)
121-
def validate(files, schemas, data, timer):
126+
def validate(files, data, schemas, timer):
122127
"""
123128
Validates a fiboa GeoParquet or GeoJSON file.
124129
"""
@@ -204,51 +209,21 @@ def validate_schema(files, metaschema):
204209
"--out", "-o", type=click.Path(exists=False), help="Path to write the file to.", required=True
205210
)
206211
@click.option(
207-
"--collection",
208-
"-c",
209-
callback=valid_file_for_cli,
210-
help="Points to the Collection that defines the fiboa version and extensions. Only applies if not provided in the GeoJSON file (embedded or as link).",
211-
default=None,
212-
)
213-
@click.option(
214-
"--schema",
212+
"--schemas",
215213
"-s",
216-
type=click.Path(exists=True),
217-
help="fiboa Schema to work against. If not provided, uses the fiboa version from the collection to load the schema for the released version.",
218-
)
219-
@click.option(
220-
"--ext-schema",
221-
"-e",
222214
multiple=True,
223-
callback=lambda ctx, param, value: check_ext_schema_for_cli(value, allow_none=True),
224-
help="Applicable fiboa extensions as URLs. Can map a remote fiboa extension schema url to a local file by adding a local file path, separated by a comma. Example: https://example.com/schema.json,/path/to/schema.json",
225-
)
226-
@click.option(
227-
"--fiboa-version",
228-
"-f",
229-
type=click.STRING,
230-
help="The applicable fiboa version if no collection is provided.",
231-
show_default=True,
232-
default=fiboa_version_,
215+
callback=lambda ctx, param, value: check_ext_schema_for_cli(value, allow_none=False),
216+
help="Maps a remote fiboa schema URL to a local file. First the URL, then the local file path. Separated with a comma character. Example: https://example.com/schema.yaml,/path/to/schema.yaml",
233217
)
234-
def create_geoparquet(files, out, collection, schema, ext_schema, fiboa_version):
218+
def create_geoparquet(files, out, schemas):
235219
"""
236220
Create a fiboa GeoParquet file from GeoJSON file(s).
237-
238-
The collection metadata has the following priority order:
239-
1. Read from the last GeoJSON file/feature (embedded 'fiboa' property)
240-
1. Read from the last GeoJSON file/feature (link with relation type 'collection')
241-
2. Read from the collection parameter
242-
3. Use fiboa_version and extension_schemas parameters
243221
"""
244222
log(f"fiboa CLI {__version__} - Create GeoParquet\n", "success")
245223
config = {
246224
"files": files,
247225
"out": out,
248-
"schema": schema,
249-
"collection": collection,
250-
"extension_schemas": ext_schema,
251-
"fiboa_version": fiboa_version,
226+
"schemas": schemas,
252227
}
253228
try:
254229
create_geoparquet_(config)
@@ -316,7 +291,18 @@ def create_geojson(file, out, features=False, num=None, indent=None):
316291
"-s",
317292
type=click.STRING,
318293
callback=valid_file_for_cli,
319-
help="fiboa Schema to create the JSON Schema for. Can be a local file or a URL. If not provided, uses the fiboa version to load the schema for the released version.",
294+
help=f"fiboa schema to create the JSON Schema for. Can be a local file or a URL. If not provided, loads the schema for fiboa version {fiboa_version_}.",
295+
show_default=True,
296+
default=FIBOA_SPECIFICAION_SCHEMA.format(version=fiboa_version_),
297+
)
298+
@click.option(
299+
"--datatypes",
300+
"-d",
301+
type=click.STRING,
302+
callback=valid_file_for_cli,
303+
help=f"Schema for the fiboa GeoJSON datatypes. Can be a local file or a URL. If not provided, loads the GeoJSON datatypes for fiboa version {fiboa_version_}.",
304+
show_default=True,
305+
default=FIBOA_GEOJSON_DATATYPES_SCHEMA.format(version=fiboa_version_),
320306
)
321307
@click.option(
322308
"--out",
@@ -325,33 +311,21 @@ def create_geojson(file, out, features=False, num=None, indent=None):
325311
help="Path to write the file to. If not provided, prints the file to the STDOUT.",
326312
default=None,
327313
)
328-
@click.option(
329-
"--fiboa-version",
330-
"-f",
331-
type=click.STRING,
332-
help="The fiboa version to validate against.",
333-
show_default=True,
334-
default=fiboa_version_,
335-
)
336314
@click.option(
337315
"--id",
338316
"-i",
339317
"id_",
340318
type=click.STRING,
341319
help="The JSON Schema $id to use for the schema. If not provided, the $id will be omitted.",
320+
default=None,
342321
)
343-
def jsonschema(schema, out, fiboa_version, id_):
322+
def jsonschema(schema, datatypes, out, id_):
344323
"""
345324
Create a JSON Schema for a fiboa Schema
346325
"""
347326
log(f"fiboa CLI {__version__} - Create JSON Schema\n", "success")
348-
config = {
349-
"schema": schema,
350-
"fiboa_version": fiboa_version,
351-
"id": id_,
352-
}
353327
try:
354-
schema = jsonschema_(config)
328+
schema = jsonschema_(schema, datatypes, id_)
355329
if out:
356330
with open(out, "w", encoding="utf-8") as f:
357331
json.dump(schema, f, indent=2)

fiboa_cli/const.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,12 @@
22

33
SUPPORTED_PROTOCOLS = ["http", "https", "s3", "gs"]
44

5+
FIBOA_SPECIFICAION_PATTERN = r"https://fiboa.github.io/specification/v([^/]+)/schema.yaml"
6+
FIBOA_SPECIFICAION_SCHEMA = "https://fiboa.github.io/specification/v{version}/schema.yaml"
7+
FIBOA_GEOJSON_DATATYPES_SCHEMA = (
8+
"https://fiboa.github.io/specification/v{version}/geojson/datatypes.json"
9+
)
10+
511
GEOPARQUET_SCHEMA = "https://geoparquet.org/releases/v{version}/schema.json"
612
STAC_TABLE_EXTENSION = "https://stac-extensions.github.io/table/v1.2.0/schema.json"
713

fiboa_cli/create_geojson.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,24 @@ def fix_geojson(obj):
7575
obj["bbox"] = [bbox["xmin"], bbox["ymin"], bbox["xmax"], bbox["ymax"]]
7676
del obj["properties"]["bbox"]
7777

78+
# Remove null values
79+
obj["properties"] = fix_omit_nulled_properties(obj["properties"])
80+
81+
return obj
82+
83+
84+
def fix_omit_nulled_properties(obj):
85+
for key in obj.keys():
86+
if obj[key] is None:
87+
del obj[key]
88+
elif isinstance(obj[key], dict):
89+
obj[key] = fix_omit_nulled_properties(obj[key])
90+
elif isinstance(obj[key], list):
91+
for i, item in enumerate(obj[key]):
92+
if not isinstance(item, dict):
93+
continue
94+
obj[key][i] = fix_omit_nulled_properties(item)
95+
7896
return obj
7997

8098

fiboa_cli/create_geoparquet.py

Lines changed: 3 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
1-
import os
2-
31
from .parquet import create_parquet
4-
from .util import get_collection, load_file, log
2+
from .util import collection_from_featurecollection, load_file, log
53

64

75
def create_geoparquet(config):
@@ -11,40 +9,21 @@ def create_geoparquet(config):
119
features = []
1210
geojson = {}
1311
file = None
12+
collection = {}
1413
files = config.get("files")
1514
for file in files:
1615
geojson = load_file(file)
1716
if geojson["type"] == "Feature":
1817
features.append(geojson)
1918
elif geojson["type"] == "FeatureCollection":
2019
features += geojson["features"]
20+
collection = collection_from_featurecollection(geojson)
2121
else:
2222
log(f"{file}: Skipped - Unsupported GeoJSON type, must be Feature or FeatureCollection")
2323

2424
if len(features) == 0:
2525
raise Exception("No valid features provided as input files")
2626

27-
# Add a STAC collection to the fiboa property to the Parquet metadata
28-
# Note: for features this loads the collection of the last feature only
29-
# if not provided specifically via collection parameter
30-
collection = get_collection(geojson, config.get("collection"), file)
31-
32-
if collection is None:
33-
# No collection found, create a default collection based on parameters
34-
version = config.get("fiboa_version")
35-
collection = {
36-
"fiboa_version": version,
37-
"fiboa_extensions": list(config.get("extension_schemas", {}).keys()),
38-
}
39-
40-
# add a default id based on the output filename
41-
if "id" not in collection or not collection["id"]:
42-
collection["id"] = os.path.basename(output_file)
43-
44-
# Make the fiboa_version consistent with the collection
45-
if "fiboa_version" in collection:
46-
config["fiboa_version"] = collection["fiboa_version"]
47-
4827
# Get a list of the properties/columns (without duplicates)
4928
columns = set(["id", "geometry"])
5029
for feature in features:

fiboa_cli/describe.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ def describe(file, display_json=False, num=10, columns=None):
3636
if "fiboa_extensions" in collection and isinstance(collection["fiboa_extensions"], list):
3737
log_extensions(collection["fiboa_extensions"], log)
3838

39-
custom_schemas = collection.get("fiboa_custom_schemas", {})
39+
custom_schemas = collection.get("custom_schemas", {})
4040
if not is_schema_empty(custom_schemas):
4141
log("Custom schemas: " + ", ".join(custom_schemas["properties"].keys()))
4242

fiboa_cli/improve.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,10 +90,10 @@ def improve(
9090
gdf["area"] = gdf_m["area"].fillna(gdf_m.geometry.area * 0.0001)
9191
gdf["perimeter"] = gdf_m["perimeter"].fillna(gdf_m.geometry.length)
9292

93-
custom_schemas = collection.get("fiboa_custom_schemas", {})
93+
custom_schemas = collection.get("custom_schemas", {})
9494
custom_schemas = pick_schemas(custom_schemas, columns, rename_columns)
9595
if not is_schema_empty(custom_schemas):
96-
collection["fiboa_custom_schemas"] = custom_schemas
96+
collection["custom_schemas"] = custom_schemas
9797

9898
# Write the merged dataset to the output file
9999
create_parquet(

0 commit comments

Comments
 (0)