Skip to content

Commit 897599d

Browse files
ranjan-sthatnagorra
authored andcommitted
Add the IDU source data validator
- Add the method to validate the source data items - Update the idu validator - Validate the data before assignment - Gidd validator added - Glide validator added - Em dat data validation - Gdacs data validators - GFD data validators - GIDD data validators - IFRC data validators - GLIDE data validators - Ibtracs validator added - Fix ibtracs collection id and country code. - GLIDE data validators - Add partial success for ibtrace.
1 parent 5af5c89 commit 897599d

20 files changed

+1318
-376
lines changed

pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ dependencies = [
3333
"pytz>=2021.1",
3434
"pandas>=2.2.0",
3535
"lxml>=5.3.0",
36-
"pydantic",
36+
"pydantic>=2.10.6",
3737
]
3838
dynamic = ["version"]
3939

pystac_monty/sources/emdat.py

+14-1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from pystac_monty.hazard_profiles import MontyHazardProfiles
2020
from pystac_monty.sources.common import MontyDataSource, MontyDataTransformer
2121
from pystac_monty.utils import rename_columns
22+
from pystac_monty.validators.em_dat import EmdatDataValidator
2223

2324
STAC_EVENT_ID_PREFIX = "emdat-event-"
2425
STAC_HAZARD_ID_PREFIX = "emdat-hazard-"
@@ -40,12 +41,24 @@ def __init__(self, source_url: str, data: Union[str, pd.DataFrame]):
4041
self.df = data
4142
elif isinstance(data, dict):
4243
# If data is a dict, assume it's Json content
43-
data = data["data"]["public_emdat"]["data"]
44+
# data = data["data"]["public_emdat"]["data"]
45+
data = self.source_data_validator(data["data"]["public_emdat"]["data"])
4446
df = pd.DataFrame(data)
4547
self.df = rename_columns(df)
4648
else:
4749
raise ValueError("Data must be either Excel content (str) or pandas DataFrame or Json")
4850

51+
def source_data_validator(self, data):
52+
valid_list = []
53+
error_list = []
54+
for item in data:
55+
if EmdatDataValidator.validate_event(item):
56+
valid_list.append(item)
57+
else:
58+
error_list.append(item)
59+
60+
return valid_list
61+
4962
def get_data(self) -> pd.DataFrame:
5063
return self.df
5164

pystac_monty/sources/gdacs.py

+20-1
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
)
2323
from pystac_monty.hazard_profiles import MontyHazardProfiles
2424
from pystac_monty.sources.common import MontyDataSource, MontyDataTransformer
25+
from pystac_monty.validators.gdacs_events import GdacsDataValidatorEvents
26+
from pystac_monty.validators.gdacs_geometry import GdacsDataValidatorGeometry
2527

2628
# Constants
2729

@@ -46,7 +48,24 @@ def __init__(self, source_url: str, data: Any, type: GDACSDataSourceType):
4648
super().__init__(source_url, data)
4749
self.type = type
4850
# all gdacs data are json
49-
self.data = json.loads(data)
51+
self.data = self.source_data_validator(json.loads(data))
52+
53+
def source_data_validator(self, data: dict):
54+
# Debug print
55+
if self.type == GDACSDataSourceType.EVENT:
56+
result = GdacsDataValidatorEvents.validate_event(data)
57+
if result:
58+
return data
59+
elif self.type == GDACSDataSourceType.GEOMETRY:
60+
new_data = {} # Store the filtered dictionary
61+
for key, value in data.items():
62+
if key == "features" and isinstance(value, list):
63+
# Validate each feature in the list and skip the ones with 'Figure cause' = 'Conflict'
64+
new_data[key] = [feature for feature in value if GdacsDataValidatorGeometry.validate_event(feature)]
65+
else:
66+
# Keep normal key-value pairs unchanged
67+
new_data[key] = value
68+
return new_data
5069

5170
def get_type(self) -> GDACSDataSourceType:
5271
return self.type

pystac_monty/sources/gfd.py

+16-2
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
)
1616
from pystac_monty.hazard_profiles import MontyHazardProfiles
1717
from pystac_monty.sources.common import MontyDataSource, MontyDataTransformer
18+
from pystac_monty.validators.gfd import GFDSourceValidator
1819

1920
# Constants
2021

@@ -28,7 +29,20 @@ class GFDDataSource(MontyDataSource):
2829

2930
def __init__(self, source_url: str, data: Any):
3031
super().__init__(source_url, data)
31-
self.data = json.loads(data)
32+
self.data = self.source_data_validator(json.loads(data))
33+
34+
def source_data_validator(self, data: list[dict]):
35+
"""Validate the source data and collect only the success items"""
36+
# TODO Handle the failed_items
37+
failed_items = []
38+
success_items = []
39+
for item in data:
40+
is_valid = GFDSourceValidator.validate_event(item)
41+
if is_valid:
42+
success_items.append(item)
43+
else:
44+
failed_items.append(item)
45+
return success_items
3246

3347

3448
class GFDTransformer(MontyDataTransformer[GFDDataSource]):
@@ -85,7 +99,7 @@ def make_source_event_item(self, data: dict) -> Item:
8599
enddate = pytz.utc.localize(datetime.fromtimestamp(data["system:time_end"] / 1000))
86100

87101
item = Item(
88-
id=f'{STAC_EVENT_ID_PREFIX}{data["id"]}',
102+
id=f"{STAC_EVENT_ID_PREFIX}{data['id']}",
89103
geometry=geometry,
90104
bbox=bbox,
91105
datetime=startdate,

pystac_monty/sources/gidd.py

+16
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from pystac_monty.hazard_profiles import MontyHazardProfiles
1818
from pystac_monty.sources.common import MontyDataSource, MontyDataTransformer
1919
from pystac_monty.sources.utils import IDMCUtils
20+
from pystac_monty.validators.gidd import GiddValidator
2021

2122
logger = logging.getLogger(__name__)
2223

@@ -31,6 +32,21 @@ class GIDDDataSource(MontyDataSource):
3132
def __init__(self, source_url: str, data: Any):
3233
super().__init__(source_url, data)
3334
self.data = json.loads(data)
35+
self.data = self.source_data_validator(json.loads(data))
36+
37+
def source_data_validator(self, data: dict):
38+
"""Validate only the items inside 'features' while keeping other keys unchanged."""
39+
40+
new_data = {} # Store the filtered dictionary
41+
42+
for key, value in data.items():
43+
if key == "features" and isinstance(value, list):
44+
# Validate each feature in the list and skip the ones with 'Figure cause' = 'Conflict'
45+
new_data[key] = [feature for feature in value if GiddValidator.validate_event(feature)]
46+
else:
47+
# Keep normal key-value pairs unchanged
48+
new_data[key] = value
49+
return new_data
3450

3551

3652
class GIDDTransformer(MontyDataTransformer[GIDDDataSource]):

pystac_monty/sources/glide.py

+14-1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from pystac_monty.extension import HazardDetail, MontyEstimateType, MontyExtension
1010
from pystac_monty.hazard_profiles import MontyHazardProfiles
1111
from pystac_monty.sources.common import MontyDataSource, MontyDataTransformer
12+
from pystac_monty.validators.glide import GlideSetValidator
1213

1314
STAC_EVENT_ID_PREFIX = "glide-event-"
1415
STAC_HAZARD_ID_PREFIX = "glide-hazard-"
@@ -17,7 +18,19 @@
1718
class GlideDataSource(MontyDataSource):
1819
def __init__(self, source_url: str, data: Any):
1920
super().__init__(source_url, data)
20-
self.data = json.loads(data)
21+
self.data = self.source_data_validator(json.loads(data))
22+
23+
def source_data_validator(self, data: dict[dict]):
24+
"""Validate the source data and collect only the success items"""
25+
failed_items = []
26+
success_items = []
27+
for item in data["glideset"]:
28+
is_valid = GlideSetValidator.validate_event(item)
29+
if is_valid:
30+
success_items.append(item)
31+
else:
32+
failed_items.append(item)
33+
return {"glideset": success_items}
2134

2235

2336
class GlideTransformer(MontyDataTransformer[GlideDataSource]):

0 commit comments

Comments
 (0)