Skip to content

Commit 855a4ca

Browse files
committed
PR #630 improve save_result handling in VectorCube too (#623, #401, #583, #391)
1 parent deaa8b1 commit 855a4ca

File tree

6 files changed

+208
-96
lines changed

6 files changed

+208
-96
lines changed

openeo/rest/_datacube.py

+35
Original file line numberDiff line numberDiff line change
@@ -319,3 +319,38 @@ def build_child_callback(
319319
raise ValueError(process)
320320

321321
return PGNode.to_process_graph_argument(pg)
322+
323+
324+
def _ensure_save_result(
325+
cube: _ProcessGraphAbstraction,
326+
*,
327+
format: Optional[str] = None,
328+
options: Optional[dict] = None,
329+
weak_format: Optional[str] = None,
330+
default_format: str,
331+
method: str,
332+
) -> _ProcessGraphAbstraction:
333+
"""
334+
Make sure there is a`save_result` node in the process graph.
335+
336+
:param format: (optional) desired `save_result` file format
337+
:param options: (optional) desired `save_result` file format parameters
338+
:param weak_format: (optional) weak format indicator guessed from file name
339+
:param default_format: default format for data type to use when no format is specified by user
340+
:return:
341+
"""
342+
# TODO #278 instead of standalone helper function, move this to common base class for raster cubes, vector cubes, ...
343+
save_result_nodes = [n for n in cube.result_node().walk_nodes() if n.process_id == "save_result"]
344+
345+
if not save_result_nodes:
346+
# No `save_result` node yet: automatically add it.
347+
# TODO: the `save_result` method is not defined on _ProcessGraphAbstraction, but it is on DataCube and VectorCube
348+
cube = cube.save_result(format=format or weak_format or default_format, options=options)
349+
elif format or options:
350+
raise OpenEoClientException(
351+
f"{method} with explicit output {'format' if format else 'options'} {format or options!r},"
352+
f" but the process graph already has `save_result` node(s)"
353+
f" which is ambiguous and should not be combined."
354+
)
355+
356+
return cube

openeo/rest/datacube.py

+24-42
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
from openeo.rest._datacube import (
4747
THIS,
4848
UDF,
49+
_ensure_save_result,
4950
_ProcessGraphAbstraction,
5051
build_child_callback,
5152
)
@@ -2095,39 +2096,6 @@ def save_result(
20952096
}
20962097
)
20972098

2098-
def _ensure_save_result(
2099-
self,
2100-
*,
2101-
format: Optional[str] = None,
2102-
options: Optional[dict] = None,
2103-
weak_format: Optional[str] = None,
2104-
method: str,
2105-
) -> DataCube:
2106-
"""
2107-
Make sure there is a (final) `save_result` node in the process graph.
2108-
If there is already one: check if it is consistent with the given format/options (if any)
2109-
and add a new one otherwise.
2110-
2111-
:param format: (optional) desired `save_result` file format
2112-
:param options: (optional) desired `save_result` file format parameters
2113-
:return:
2114-
"""
2115-
# TODO #401 Unify with VectorCube._ensure_save_result and move to generic data cube parent class (not only for raster cubes, but also vector cubes)
2116-
save_result_nodes = [n for n in self.result_node().walk_nodes() if n.process_id == "save_result"]
2117-
2118-
cube = self
2119-
if not save_result_nodes:
2120-
# No `save_result` node yet: automatically add it.
2121-
cube = cube.save_result(format=format or weak_format or self._DEFAULT_RASTER_FORMAT, options=options)
2122-
elif format or options:
2123-
raise OpenEoClientException(
2124-
f"{method} with explicit output {'format' if format else 'options'} {format or options!r},"
2125-
f" but the process graph already has `save_result` node(s)"
2126-
f" which is ambiguous and should not be combined."
2127-
)
2128-
2129-
return cube
2130-
21312099
def download(
21322100
self,
21332101
outputfile: Optional[Union[str, pathlib.Path]] = None,
@@ -2149,9 +2117,14 @@ def download(
21492117
(overruling the connection's ``auto_validate`` setting).
21502118
:return: None if the result is stored to disk, or a bytes object returned by the backend.
21512119
"""
2152-
weak_format = guess_format(outputfile) if outputfile else None
2153-
cube = self._ensure_save_result(
2154-
format=format, options=options, weak_format=weak_format, method="DataCube.download()"
2120+
# TODO #278 centralize download/create_job/execute_job logic in DataCube, VectorCube, MlModel, ...
2121+
cube = _ensure_save_result(
2122+
cube=self,
2123+
format=format,
2124+
options=options,
2125+
weak_format=guess_format(outputfile) if outputfile else None,
2126+
default_format=self._DEFAULT_RASTER_FORMAT,
2127+
method="DataCube.download()",
21552128
)
21562129
return self._connection.download(cube.flat_graph(), outputfile, validate=validate)
21572130

@@ -2274,9 +2247,14 @@ def execute_batch(
22742247
if "format" in format_options and not out_format:
22752248
out_format = format_options["format"] # align with 'download' call arg name
22762249

2277-
weak_format = guess_format(outputfile) if outputfile else None
2278-
cube = self._ensure_save_result(
2279-
format=out_format, options=format_options, weak_format=weak_format, method="DataCube.execute_batch()"
2250+
# TODO #278 centralize download/create_job/execute_job logic in DataCube, VectorCube, MlModel, ...
2251+
cube = _ensure_save_result(
2252+
cube=self,
2253+
format=out_format,
2254+
options=format_options,
2255+
weak_format=guess_format(outputfile) if outputfile else None,
2256+
default_format=self._DEFAULT_RASTER_FORMAT,
2257+
method="DataCube.execute_batch()",
22802258
)
22812259

22822260
job = cube.create_job(job_options=job_options, validate=validate)
@@ -2320,9 +2298,13 @@ def create_job(
23202298
"""
23212299
# TODO: add option to also automatically start the job?
23222300
# TODO: avoid using all kwargs as format_options
2323-
# TODO: centralize `create_job` for `DataCube`, `VectorCube`, `MlModel`, ...
2324-
cube = self._ensure_save_result(
2325-
format=out_format, options=format_options or None, method="DataCube.create_job()"
2301+
# TODO #278 centralize download/create_job/execute_job logic in DataCube, VectorCube, MlModel, ...
2302+
cube = _ensure_save_result(
2303+
cube=self,
2304+
format=out_format,
2305+
options=format_options or None,
2306+
default_format=self._DEFAULT_RASTER_FORMAT,
2307+
method="DataCube.create_job()",
23262308
)
23272309
return self._connection.create_job(
23282310
process_graph=cube.flat_graph(),

openeo/rest/vectorcube.py

+29-44
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from openeo.rest._datacube import (
1717
THIS,
1818
UDF,
19+
_ensure_save_result,
1920
_ProcessGraphAbstraction,
2021
build_child_callback,
2122
)
@@ -37,6 +38,8 @@ class VectorCube(_ProcessGraphAbstraction):
3738
A geometry is specified in a 'coordinate reference system'. https://www.w3.org/TR/sdw-bp/#dfn-coordinate-reference-system-(crs)
3839
"""
3940

41+
_DEFAULT_VECTOR_FORMAT = "GeoJSON"
42+
4043
def __init__(self, graph: PGNode, connection: Connection, metadata: Optional[CubeMetadata] = None):
4144
super().__init__(pgnode=graph, connection=connection)
4245
self.metadata = metadata
@@ -195,38 +198,6 @@ def save_result(self, format: Union[str, None] = "GeoJSON", options: dict = None
195198
},
196199
)
197200

198-
def _ensure_save_result(
199-
self,
200-
format: Optional[str] = None,
201-
options: Optional[dict] = None,
202-
) -> VectorCube:
203-
"""
204-
Make sure there is a (final) `save_result` node in the process graph.
205-
If there is already one: check if it is consistent with the given format/options (if any)
206-
and add a new one otherwise.
207-
208-
:param format: (optional) desired `save_result` file format
209-
:param options: (optional) desired `save_result` file format parameters
210-
:return:
211-
"""
212-
# TODO #401 Unify with DataCube._ensure_save_result and move to generic data cube parent class
213-
result_node = self.result_node()
214-
if result_node.process_id == "save_result":
215-
# There is already a `save_result` node:
216-
# check if it is consistent with given format/options (if any)
217-
args = result_node.arguments
218-
if format is not None and format.lower() != args["format"].lower():
219-
raise ValueError(f"Existing `save_result` node with different format {args['format']!r} != {format!r}")
220-
if options is not None and options != args["options"]:
221-
raise ValueError(
222-
f"Existing `save_result` node with different options {args['options']!r} != {options!r}"
223-
)
224-
cube = self
225-
else:
226-
# No `save_result` node yet: automatically add it.
227-
cube = self.save_result(format=format or "GeoJSON", options=options)
228-
return cube
229-
230201
def execute(self, *, validate: Optional[bool] = None) -> dict:
231202
"""Executes the process graph."""
232203
return self._connection.execute(self.flat_graph(), validate=validate)
@@ -255,11 +226,15 @@ def download(
255226
When not specified explicitly, output format is guessed from output file extension.
256227
257228
"""
258-
# TODO #401 make outputfile optional (See DataCube.download)
259-
# TODO #401/#449 don't guess/override format if there is already a save_result with format?
260-
if format is None and outputfile:
261-
format = guess_format(outputfile)
262-
cube = self._ensure_save_result(format=format, options=options)
229+
# TODO #278 centralize download/create_job/execute_job logic in DataCube, VectorCube, MlModel, ...
230+
cube = _ensure_save_result(
231+
cube=self,
232+
format=format,
233+
options=options,
234+
weak_format=guess_format(outputfile) if outputfile else None,
235+
default_format=self._DEFAULT_VECTOR_FORMAT,
236+
method="VectorCube.download()",
237+
)
263238
return self._connection.download(cube.flat_graph(), outputfile=outputfile, validate=validate)
264239

265240
def execute_batch(
@@ -291,11 +266,15 @@ def execute_batch(
291266
.. versionchanged:: 0.21.0
292267
When not specified explicitly, output format is guessed from output file extension.
293268
"""
294-
if out_format is None and outputfile:
295-
# TODO #401/#449 don't guess/override format if there is already a save_result with format?
296-
out_format = guess_format(outputfile)
297-
298-
job = self.create_job(out_format, job_options=job_options, validate=validate, **format_options)
269+
cube = _ensure_save_result(
270+
cube=self,
271+
format=out_format,
272+
options=format_options,
273+
weak_format=guess_format(outputfile) if outputfile else None,
274+
default_format=self._DEFAULT_VECTOR_FORMAT,
275+
method="VectorCube.execute_batch()",
276+
)
277+
job = cube.create_job(job_options=job_options, validate=validate)
299278
return job.run_synchronous(
300279
# TODO #135 support multi file result sets too
301280
outputfile=outputfile,
@@ -331,8 +310,14 @@ def create_job(
331310
:return: Created job.
332311
"""
333312
# TODO: avoid using all kwargs as format_options
334-
# TODO: centralize `create_job` for `DataCube`, `VectorCube`, `MlModel`, ...
335-
cube = self._ensure_save_result(format=out_format, options=format_options or None)
313+
# TODO #278 centralize download/create_job/execute_job logic in DataCube, VectorCube, MlModel, ...
314+
cube = _ensure_save_result(
315+
cube=self,
316+
format=out_format,
317+
options=format_options or None,
318+
default_format=self._DEFAULT_VECTOR_FORMAT,
319+
method="VectorCube.create_job()",
320+
)
336321
return self._connection.create_job(
337322
process_graph=cube.flat_graph(),
338323
title=title,

openeo/util.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -416,12 +416,15 @@ def deep_set(data: dict, *keys, value):
416416
raise ValueError("No keys given")
417417

418418

419-
def guess_format(filename: Union[str, Path]) -> str:
419+
def guess_format(filename: Union[str, Path]) -> Union[str, None]:
420420
"""
421421
Guess the output format from a given filename and return the corrected format.
422422
Any names not in the dict get passed through.
423423
"""
424-
extension = str(filename).rsplit(".", 1)[-1].lower()
424+
extension = Path(filename).suffix
425+
if not extension:
426+
return None
427+
extension = extension[1:].lower()
425428

426429
format_map = {
427430
"gtiff": "GTiff",

0 commit comments

Comments
 (0)