Skip to content

Commit 57070cd

Browse files
committed
Upgrade validation to v0.3.0
1 parent 9276422 commit 57070cd

File tree

6 files changed

+89
-154
lines changed

6 files changed

+89
-154
lines changed

fiboa_cli/__init__.py

Lines changed: 4 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -96,32 +96,11 @@ def describe(file, json, num=10, column=[]):
9696
),
9797
)
9898
@click.option(
99-
"--schema",
99+
"--schemas",
100100
"-s",
101-
type=click.STRING,
102-
callback=valid_file_for_cli,
103-
help="fiboa Schema to validate against. Can be a local file or a URL. If not provided, uses the fiboa version to load the schema for the released version.",
104-
)
105-
@click.option(
106-
"--ext-schema",
107-
"-e",
108101
multiple=True,
109102
callback=lambda ctx, param, value: check_ext_schema_for_cli(value, allow_none=False),
110-
help="Maps a remote fiboa extension schema url to a local file. First the URL, then the local file path. Separated with a comma character. Example: https://example.com/schema.yaml,/path/to/schema.yaml",
111-
)
112-
@click.option(
113-
"--fiboa-version",
114-
"-f",
115-
type=click.STRING,
116-
help="The fiboa version to validate against. Default is the version given in the collection.",
117-
default=None,
118-
)
119-
@click.option(
120-
"--collection",
121-
"-c",
122-
type=click.Path(exists=True),
123-
help="Points to the Collection that defines the fiboa version and extensions.",
124-
default=None,
103+
help="Maps a remote fiboa schema URL to a local file. First the URL, then the local file path. Separated with a comma character. Example: https://example.com/schema.yaml,/path/to/schema.yaml",
125104
)
126105
@click.option(
127106
"--data",
@@ -139,17 +118,14 @@ def describe(file, json, num=10, column=[]):
139118
default=False,
140119
hidden=True,
141120
)
142-
def validate(files, schema, ext_schema, fiboa_version, collection, data, timer):
121+
def validate(files, schemas, data, timer):
143122
"""
144123
Validates a fiboa GeoParquet or GeoJSON file.
145124
"""
146125
start = time.perf_counter()
147126
log(f"fiboa CLI {__version__} - Validator\n", "success")
148127
config = {
149-
"schema": schema,
150-
"extension_schemas": ext_schema,
151-
"fiboa_version": fiboa_version,
152-
"collection": collection,
128+
"schemas": schemas,
153129
"data": data,
154130
}
155131

fiboa_cli/const.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,6 @@
22

33
SUPPORTED_PROTOCOLS = ["http", "https", "s3", "gs"]
44

5-
STAC_COLLECTION_SCHEMA = (
6-
"http://schemas.stacspec.org/v{version}/collection-spec/json-schema/collection.json"
7-
)
85
GEOPARQUET_SCHEMA = "https://geoparquet.org/releases/v{version}/schema.json"
96
STAC_TABLE_EXTENSION = "https://stac-extensions.github.io/table/v1.2.0/schema.json"
107

fiboa_cli/describe.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ def describe(file, display_json=False, num=10, columns=None):
3434
if collection:
3535
log(f"fiboa version: {collection['fiboa_version']}")
3636
if "fiboa_extensions" in collection and isinstance(collection["fiboa_extensions"], list):
37-
log_extensions(collection, log)
37+
log_extensions(collection["fiboa_extensions"], log)
3838

3939
custom_schemas = collection.get("fiboa_custom_schemas", {})
4040
if not is_schema_empty(custom_schemas):

fiboa_cli/util.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -324,27 +324,27 @@ def to_iso8601(dt):
324324
return iso + "Z"
325325

326326

327-
def load_collection_schema(obj):
328-
if "stac_version" in obj:
329-
return load_file(STAC_COLLECTION_SCHEMA.format(version=obj["stac_version"]))
330-
else:
331-
return None
332-
333-
334327
def load_geoparquet_schema(obj):
335328
if "version" in obj:
336329
return load_file(GEOPARQUET_SCHEMA.format(version=obj["version"]))
337330
else:
338331
return None
339332

340333

341-
def log_extensions(collection, logger):
342-
extensions = collection.get("fiboa_extensions", [])
343-
if len(extensions) == 0:
334+
def get_core_version(uri):
335+
match = re.match(r"https://fiboa.github.io/specification/v([^/]+)/schema.yaml", uri)
336+
return match.group(1) if match else None
337+
338+
339+
def log_extensions(schemas, logger):
340+
schemas = schemas.sort()
341+
if len(schemas) <= 1:
344342
logger("fiboa extensions: none")
345343
else:
346344
logger("fiboa extensions:")
347-
for extension in extensions:
345+
for extension in schemas:
346+
if get_core_version(extension) is not None:
347+
continue
348348
logger(f" - {extension}")
349349

350350

fiboa_cli/validate.py

Lines changed: 72 additions & 110 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
import json
2+
import re
23

34
import pyarrow.types as pat
45

56
from .jsonschema import create_jsonschema
67
from .types import PA_TYPE_CHECK
78
from .util import (
89
create_validator,
9-
get_collection,
10-
load_collection_schema,
10+
get_core_version,
1111
load_datatypes,
1212
load_fiboa_schema,
1313
load_file,
@@ -20,6 +20,7 @@
2020
)
2121
from .util import log as log_
2222
from .validate_data import validate_column
23+
from .version import fiboa_version
2324

2425

2526
def log(text: str, status="info", bullet=True):
@@ -35,91 +36,94 @@ def validate(file, config):
3536
return validate_parquet(file, config)
3637

3738

38-
def validate_collection(collection, config):
39+
def validate_schemas(schema_uris, config):
3940
valid = True
4041

41-
collection_version = collection.get("fiboa_version")
42-
config_version = config.get("fiboa_version")
42+
if not isinstance(schema_uris, list):
43+
log("A list of schemas must be provied", "error")
44+
valid = False
4345

44-
# Check fiboa version
45-
if not isinstance(collection_version, str):
46-
log("No fiboa_version string found in collection metadata", "error")
46+
if len(schema_uris) == 0:
47+
log("No schemas provided", "error")
4748
valid = False
4849

49-
log("fiboa version: " + config_version)
50+
# Detect and check fiboa version
51+
version = None
52+
core_schema = None
53+
for schema_uri in schema_uris:
54+
version = get_core_version(schema_uri)
55+
if version is not None:
56+
core_schema = schema_uri
57+
break
58+
59+
if version is None:
60+
log("fiboa core schema not found in schemas, can't detect fiboa version", "error")
61+
valid = False
5062

51-
if isinstance(collection_version, str) and collection_version != config_version:
63+
if version != fiboa_version:
5264
log(
53-
f"fiboa versions differs: Collection is {collection_version} and requested specification version is {config_version}",
65+
f"fiboa versions differs: Schema reports {version} and supported version is {fiboa_version}",
5466
"warning",
5567
)
5668

57-
# Check STAC Collection
58-
if not validate_colletion_schema(collection):
59-
valid = False
69+
# Check schemas (core and extensions)
70+
schemas = {}
71+
schema_map = config.get("schemas", {})
72+
for uri in schema_uris:
73+
try:
74+
if uri in schema_map:
75+
actual_location = schema_map[uri]
76+
log(f"Redirecting {uri} to {actual_location}", "info")
77+
else:
78+
actual_location = uri
6079

61-
# Check extensions
62-
extensions = {}
63-
if "fiboa_extensions" in collection:
64-
ext_list = collection.get("fiboa_extensions")
65-
if not isinstance(ext_list, list):
66-
log("fiboa_extensions must be a list", "error")
80+
schemas[schema_uri] = load_file(actual_location)
81+
except Exception as e:
82+
log(f"Extension {uri} can't be loaded: {e}", "error")
6783
valid = False
68-
else:
69-
ext_map = config.get("extension_schemas", [])
70-
for ext in ext_list:
71-
try:
72-
if ext in ext_map:
73-
path = ext_map[ext]
74-
log(f"Redirecting {ext} to {path}", "info")
75-
else:
76-
path = ext
77-
extensions[ext] = load_file(path)
78-
except Exception as e:
79-
log(f"Extension {ext} can't be loaded: {e}", "error")
80-
valid = False
8184

82-
log_extensions(collection, lambda x: log(x, "info", False))
85+
log_extensions(schema_uris, lambda x: log(x, "info", False))
8386

84-
return valid, extensions
87+
return valid, version, core_schema, schemas
8588

8689

8790
def validate_geojson(file, config):
88-
valid = True
89-
extensions = {}
90-
9191
try:
9292
data = load_file(file)
9393
except Exception as error:
9494
log(error, "error")
9595
return False
9696

97-
collection = get_collection(data, config.get("collection"), file)
98-
if collection is None:
99-
log("No collection specified", "error")
100-
valid = False
97+
if not isinstance(data, dict):
98+
log("Must be a JSON object", "error")
99+
return False
101100

102-
if config.get("fiboa_version") is None and collection.get("fiboa_version") is not None:
103-
config["fiboa_version"] = collection.get("fiboa_version")
101+
schemas_uris = {}
102+
collection = {}
103+
feature_type = data.get("type")
104+
if feature_type == "FeatureCollection":
105+
collection = data.get("fiboa", {})
106+
schemas_uris = collection.get("schemas", {})
107+
features = data["features"]
108+
elif feature_type == "Feature":
109+
schemas_uris = data.get("properties", {}).get("schemas", {})
110+
features = [data]
111+
else:
112+
log("Must be a GeoJSON Feature or FeatureCollection", "error")
113+
return False
104114

105-
if collection is not None:
106-
collection_valid, extensions = validate_collection(collection, config)
107-
if not collection_valid:
108-
valid = False
115+
valid, version, core_schema_uri, schemas = validate_schemas(schemas_uris, config)
109116

110-
core_schema = load_fiboa_schema(config)
111-
datatypes = load_datatypes(config["fiboa_version"])
117+
core_schema = schemas[core_schema_uri]
118+
datatypes = load_datatypes(version)
112119
schema = create_jsonschema(core_schema, datatypes)
113120

114121
# Load extensions
115122
ext_errors = []
116-
for ext in extensions:
123+
extensions = {}
124+
for ext in schemas:
117125
try:
118-
uri = ext
119-
if ext in config["extension_schemas"]:
120-
uri = config["extension_schemas"][ext]
121-
ext_schema = load_file(uri)
122-
json_schema = create_jsonschema(ext_schema, datatypes)
126+
json_schema = create_jsonschema(schemas[ext], datatypes)
123127
extensions[ext] = lambda obj: validate_json_schema(obj, json_schema)
124128
except Exception as error:
125129
extensions[ext] = None
@@ -129,21 +133,6 @@ def validate_geojson(file, config):
129133
log(error, "error")
130134

131135
# Validate
132-
if not isinstance(data, dict):
133-
log("Must be a JSON object", "error")
134-
return False
135-
136-
if data["type"] == "Feature":
137-
features = [data]
138-
elif data["type"] == "FeatureCollection":
139-
features = data["features"]
140-
elif data["type"] == "Collection":
141-
# Skipping specific checks related to STAC Collection
142-
return None
143-
else:
144-
log("Must be a GeoJSON Feature or FeatureCollection", "error")
145-
return False
146-
147136
if len(features) == 0:
148137
log("Must contain at least one Feature", "error")
149138
return False
@@ -176,41 +165,28 @@ def validate_geojson(file, config):
176165

177166
def validate_parquet(file, config):
178167
parquet_schema = load_parquet_schema(file)
179-
valid = True
180-
extensions = {}
181168

182169
# Validate geo metadata in Parquet header
183170
if b"geo" not in parquet_schema.metadata:
184171
log("Parquet file schema does not have 'geo' key", "error")
185-
valid = False
172+
return False
186173
else:
187174
geo = parse_metadata(parquet_schema, b"geo")
188175
if not validate_geoparquet_schema(geo):
189-
valid = False
176+
return False
190177

191178
# Validate fiboa metadata in Parquet header
192-
collection = {}
193179
if b"fiboa" not in parquet_schema.metadata:
194-
log("Parquet file schema does not have a 'fiboa' key", "warning")
195-
if not config.get("collection"):
196-
log("No collection specified", "error")
197-
valid = False
198-
else:
199-
collection = load_file(config.get("collection"))
200-
else:
201-
collection = parse_metadata(parquet_schema, b"fiboa")
180+
log("Parquet file schema does not have a 'fiboa' key", "error")
181+
return False
202182

203-
if config.get("fiboa_version") is None and collection.get("fiboa_version") is not None:
204-
config["fiboa_version"] = collection.get("fiboa_version")
183+
collection = parse_metadata(parquet_schema, b"fiboa")
184+
schemas_uris = collection.get("schemas", {})
205185

206-
# Validate Collection
207-
if len(collection) > 0:
208-
valid_collection, extensions = validate_collection(collection, config)
209-
if not valid_collection:
210-
valid = False
186+
valid, version, core_schema_uri, schemas = validate_schemas(schemas_uris, config)
211187

212-
# load the actual fiboa schema
213-
fiboa_schema = load_fiboa_schema(config)
188+
fiboa_schema = schemas[core_schema_uri]
189+
extensions = {}
214190

215191
# Load data if needed
216192
gdf = None
@@ -223,7 +199,9 @@ def validate_parquet(file, config):
223199

224200
# Compile all properties from the schemas
225201
schemas = fiboa_schema
226-
for ext in extensions.values():
202+
for ext in schemas.values():
203+
if core_schema_uri == ext:
204+
continue
227205
schemas = merge_schemas(schemas, ext)
228206

229207
# Add custom schemas
@@ -326,22 +304,6 @@ def validate_geometry_column(key, prop_schema, geo, valid=True):
326304
return valid
327305

328306

329-
# todo: use stac_validator instead of our own validation routine
330-
def validate_colletion_schema(obj):
331-
if "stac_version" in obj:
332-
try:
333-
schema = load_collection_schema(obj)
334-
errors = validate_json_schema(obj, schema)
335-
for error in errors:
336-
log(f"Collection: {error.path}: {error.message}", "error")
337-
338-
return len(errors) == 0
339-
except Exception as e:
340-
log(f"Failed to validate STAC Collection due to an internal error: {e}", "warning")
341-
342-
return True
343-
344-
345307
# todo: use a geoparquet validator instead of our own validation routine
346308
def validate_geoparquet_schema(obj):
347309
if "version" in obj:

fiboa_cli/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
__version__ = "0.10.0"
2-
fiboa_version = "0.2.0"
2+
fiboa_version = "0.3.0"

0 commit comments

Comments
 (0)