1
1
import json
2
+ import re
2
3
3
4
import pyarrow .types as pat
4
5
5
6
from .jsonschema import create_jsonschema
6
7
from .types import PA_TYPE_CHECK
7
8
from .util import (
8
9
create_validator ,
9
- get_collection ,
10
- load_collection_schema ,
10
+ get_core_version ,
11
11
load_datatypes ,
12
12
load_fiboa_schema ,
13
13
load_file ,
20
20
)
21
21
from .util import log as log_
22
22
from .validate_data import validate_column
23
+ from .version import fiboa_version
23
24
24
25
25
26
def log (text : str , status = "info" , bullet = True ):
@@ -35,91 +36,94 @@ def validate(file, config):
35
36
return validate_parquet (file , config )
36
37
37
38
38
- def validate_collection ( collection , config ):
39
+ def validate_schemas ( schema_uris , config ):
39
40
valid = True
40
41
41
- collection_version = collection .get ("fiboa_version" )
42
- config_version = config .get ("fiboa_version" )
42
+ if not isinstance (schema_uris , list ):
43
+ log ("A list of schemas must be provied" , "error" )
44
+ valid = False
43
45
44
- # Check fiboa version
45
- if not isinstance (collection_version , str ):
46
- log ("No fiboa_version string found in collection metadata" , "error" )
46
+ if len (schema_uris ) == 0 :
47
+ log ("No schemas provided" , "error" )
47
48
valid = False
48
49
49
- log ("fiboa version: " + config_version )
50
+ # Detect and check fiboa version
51
+ version = None
52
+ core_schema = None
53
+ for schema_uri in schema_uris :
54
+ version = get_core_version (schema_uri )
55
+ if version is not None :
56
+ core_schema = schema_uri
57
+ break
58
+
59
+ if version is None :
60
+ log ("fiboa core schema not found in schemas, can't detect fiboa version" , "error" )
61
+ valid = False
50
62
51
- if isinstance ( collection_version , str ) and collection_version != config_version :
63
+ if version != fiboa_version :
52
64
log (
53
- f"fiboa versions differs: Collection is { collection_version } and requested specification version is { config_version } " ,
65
+ f"fiboa versions differs: Schema reports { version } and supported version is { fiboa_version } " ,
54
66
"warning" ,
55
67
)
56
68
57
- # Check STAC Collection
58
- if not validate_colletion_schema (collection ):
59
- valid = False
69
+ # Check schemas (core and extensions)
70
+ schemas = {}
71
+ schema_map = config .get ("schemas" , {})
72
+ for uri in schema_uris :
73
+ try :
74
+ if uri in schema_map :
75
+ actual_location = schema_map [uri ]
76
+ log (f"Redirecting { uri } to { actual_location } " , "info" )
77
+ else :
78
+ actual_location = uri
60
79
61
- # Check extensions
62
- extensions = {}
63
- if "fiboa_extensions" in collection :
64
- ext_list = collection .get ("fiboa_extensions" )
65
- if not isinstance (ext_list , list ):
66
- log ("fiboa_extensions must be a list" , "error" )
80
+ schemas [schema_uri ] = load_file (actual_location )
81
+ except Exception as e :
82
+ log (f"Extension { uri } can't be loaded: { e } " , "error" )
67
83
valid = False
68
- else :
69
- ext_map = config .get ("extension_schemas" , [])
70
- for ext in ext_list :
71
- try :
72
- if ext in ext_map :
73
- path = ext_map [ext ]
74
- log (f"Redirecting { ext } to { path } " , "info" )
75
- else :
76
- path = ext
77
- extensions [ext ] = load_file (path )
78
- except Exception as e :
79
- log (f"Extension { ext } can't be loaded: { e } " , "error" )
80
- valid = False
81
84
82
- log_extensions (collection , lambda x : log (x , "info" , False ))
85
+ log_extensions (schema_uris , lambda x : log (x , "info" , False ))
83
86
84
- return valid , extensions
87
+ return valid , version , core_schema , schemas
85
88
86
89
87
90
def validate_geojson (file , config ):
88
- valid = True
89
- extensions = {}
90
-
91
91
try :
92
92
data = load_file (file )
93
93
except Exception as error :
94
94
log (error , "error" )
95
95
return False
96
96
97
- collection = get_collection (data , config .get ("collection" ), file )
98
- if collection is None :
99
- log ("No collection specified" , "error" )
100
- valid = False
97
+ if not isinstance (data , dict ):
98
+ log ("Must be a JSON object" , "error" )
99
+ return False
101
100
102
- if config .get ("fiboa_version" ) is None and collection .get ("fiboa_version" ) is not None :
103
- config ["fiboa_version" ] = collection .get ("fiboa_version" )
101
+ schemas_uris = {}
102
+ collection = {}
103
+ feature_type = data .get ("type" )
104
+ if feature_type == "FeatureCollection" :
105
+ collection = data .get ("fiboa" , {})
106
+ schemas_uris = collection .get ("schemas" , {})
107
+ features = data ["features" ]
108
+ elif feature_type == "Feature" :
109
+ schemas_uris = data .get ("properties" , {}).get ("schemas" , {})
110
+ features = [data ]
111
+ else :
112
+ log ("Must be a GeoJSON Feature or FeatureCollection" , "error" )
113
+ return False
104
114
105
- if collection is not None :
106
- collection_valid , extensions = validate_collection (collection , config )
107
- if not collection_valid :
108
- valid = False
115
+ valid , version , core_schema_uri , schemas = validate_schemas (schemas_uris , config )
109
116
110
- core_schema = load_fiboa_schema ( config )
111
- datatypes = load_datatypes (config [ "fiboa_version" ] )
117
+ core_schema = schemas [ core_schema_uri ]
118
+ datatypes = load_datatypes (version )
112
119
schema = create_jsonschema (core_schema , datatypes )
113
120
114
121
# Load extensions
115
122
ext_errors = []
116
- for ext in extensions :
123
+ extensions = {}
124
+ for ext in schemas :
117
125
try :
118
- uri = ext
119
- if ext in config ["extension_schemas" ]:
120
- uri = config ["extension_schemas" ][ext ]
121
- ext_schema = load_file (uri )
122
- json_schema = create_jsonschema (ext_schema , datatypes )
126
+ json_schema = create_jsonschema (schemas [ext ], datatypes )
123
127
extensions [ext ] = lambda obj : validate_json_schema (obj , json_schema )
124
128
except Exception as error :
125
129
extensions [ext ] = None
@@ -129,21 +133,6 @@ def validate_geojson(file, config):
129
133
log (error , "error" )
130
134
131
135
# Validate
132
- if not isinstance (data , dict ):
133
- log ("Must be a JSON object" , "error" )
134
- return False
135
-
136
- if data ["type" ] == "Feature" :
137
- features = [data ]
138
- elif data ["type" ] == "FeatureCollection" :
139
- features = data ["features" ]
140
- elif data ["type" ] == "Collection" :
141
- # Skipping specific checks related to STAC Collection
142
- return None
143
- else :
144
- log ("Must be a GeoJSON Feature or FeatureCollection" , "error" )
145
- return False
146
-
147
136
if len (features ) == 0 :
148
137
log ("Must contain at least one Feature" , "error" )
149
138
return False
@@ -176,41 +165,28 @@ def validate_geojson(file, config):
176
165
177
166
def validate_parquet (file , config ):
178
167
parquet_schema = load_parquet_schema (file )
179
- valid = True
180
- extensions = {}
181
168
182
169
# Validate geo metadata in Parquet header
183
170
if b"geo" not in parquet_schema .metadata :
184
171
log ("Parquet file schema does not have 'geo' key" , "error" )
185
- valid = False
172
+ return False
186
173
else :
187
174
geo = parse_metadata (parquet_schema , b"geo" )
188
175
if not validate_geoparquet_schema (geo ):
189
- valid = False
176
+ return False
190
177
191
178
# Validate fiboa metadata in Parquet header
192
- collection = {}
193
179
if b"fiboa" not in parquet_schema .metadata :
194
- log ("Parquet file schema does not have a 'fiboa' key" , "warning" )
195
- if not config .get ("collection" ):
196
- log ("No collection specified" , "error" )
197
- valid = False
198
- else :
199
- collection = load_file (config .get ("collection" ))
200
- else :
201
- collection = parse_metadata (parquet_schema , b"fiboa" )
180
+ log ("Parquet file schema does not have a 'fiboa' key" , "error" )
181
+ return False
202
182
203
- if config . get ( "fiboa_version" ) is None and collection . get ( "fiboa_version" ) is not None :
204
- config [ "fiboa_version" ] = collection .get ("fiboa_version" )
183
+ collection = parse_metadata ( parquet_schema , b"fiboa" )
184
+ schemas_uris = collection .get ("schemas" , {} )
205
185
206
- # Validate Collection
207
- if len (collection ) > 0 :
208
- valid_collection , extensions = validate_collection (collection , config )
209
- if not valid_collection :
210
- valid = False
186
+ valid , version , core_schema_uri , schemas = validate_schemas (schemas_uris , config )
211
187
212
- # load the actual fiboa schema
213
- fiboa_schema = load_fiboa_schema ( config )
188
+ fiboa_schema = schemas [ core_schema_uri ]
189
+ extensions = {}
214
190
215
191
# Load data if needed
216
192
gdf = None
@@ -223,7 +199,9 @@ def validate_parquet(file, config):
223
199
224
200
# Compile all properties from the schemas
225
201
schemas = fiboa_schema
226
- for ext in extensions .values ():
202
+ for ext in schemas .values ():
203
+ if core_schema_uri == ext :
204
+ continue
227
205
schemas = merge_schemas (schemas , ext )
228
206
229
207
# Add custom schemas
@@ -326,22 +304,6 @@ def validate_geometry_column(key, prop_schema, geo, valid=True):
326
304
return valid
327
305
328
306
329
- # todo: use stac_validator instead of our own validation routine
330
- def validate_colletion_schema (obj ):
331
- if "stac_version" in obj :
332
- try :
333
- schema = load_collection_schema (obj )
334
- errors = validate_json_schema (obj , schema )
335
- for error in errors :
336
- log (f"Collection: { error .path } : { error .message } " , "error" )
337
-
338
- return len (errors ) == 0
339
- except Exception as e :
340
- log (f"Failed to validate STAC Collection due to an internal error: { e } " , "warning" )
341
-
342
- return True
343
-
344
-
345
307
# todo: use a geoparquet validator instead of our own validation routine
346
308
def validate_geoparquet_schema (obj ):
347
309
if "version" in obj :
0 commit comments