Skip to content

Commit

Permalink
Synchronize docs between DataCube, VectorCube, ...
Browse files Browse the repository at this point in the history
use tests to enforce keeping docs in sync

related to #402/#720/#725
  • Loading branch information
soxofaan committed Feb 11, 2025
1 parent 9d96665 commit 62f2221
Show file tree
Hide file tree
Showing 6 changed files with 199 additions and 36 deletions.
39 changes: 38 additions & 1 deletion openeo/internal/documentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@

import collections
import inspect
import re
import textwrap
from functools import partial
from typing import Callable, Optional, Tuple, TypeVar
from typing import Callable, Dict, Optional, Tuple, TypeVar, Union

# TODO: give this a proper public API?
_process_registry = collections.defaultdict(list)
Expand Down Expand Up @@ -58,3 +59,39 @@ def decorate(f: Callable) -> Callable:
return f

return decorate


def _get_doc(obj: Union[str, Callable]) -> str:
"""
Get docstring of a method or function.
"""
if isinstance(obj, str):
doc = obj
else:
doc = obj.__doc__
return textwrap.dedent(doc)


def extract_params(doc: Union[str, Callable]) -> Dict[str, str]:
"""
Extract parameters (``:param name:`` format) from a docstring.
"""
doc = _get_doc(doc)
params_regex = re.compile(r"^:param\s+(?P<param>\w+)\s*:(?P<doc>.*(\n +.*)*)", re.MULTILINE)
return {m.group("param"): m.group("doc").strip() for m in params_regex.finditer(doc)}


def assert_same_param_docs(doc_a: Union[str, Callable], doc_b: Union[str, Callable], only_intersection: bool = False):
"""
Compare parameters (``:param name:`` format) from two docstrings.
"""
# TODO: option to also check order?
params_a = extract_params(doc_a)
params_b = extract_params(doc_b)

if only_intersection:
intersection = set(params_a.keys()).intersection(params_b.keys())
params_a = {k: v for k, v in params_a.items() if k in intersection}
params_b = {k: v for k, v in params_b.items() if k in intersection}

assert params_a == params_b
3 changes: 2 additions & 1 deletion openeo/rest/datacube.py
Original file line number Diff line number Diff line change
Expand Up @@ -2549,6 +2549,7 @@ def execute_batch(
One of "error" (highest severity), "warning", "info", and "debug" (lowest severity).
:param max_poll_interval: maximum number of seconds to sleep between job status polls
:param connection_retry_interval: how long to wait when status poll failed due to connection issue
:param print: print/logging function to show progress/status
.. versionchanged:: 0.32.0
Added ``auto_add_save_result`` option
Expand Down Expand Up @@ -2641,7 +2642,7 @@ def create_job(
:param log_level: Optional minimum severity level for log entries that the back-end should keep track of.
One of "error" (highest severity), "warning", "info", and "debug" (lowest severity).
:return: Created job.
:return: Handle for the job created at the backend.
.. versionchanged:: 0.32.0
Added ``auto_add_save_result`` option
Expand Down
49 changes: 35 additions & 14 deletions openeo/rest/mlmodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,15 +79,28 @@ def execute_batch(
log_level: Optional[str] = None,
) -> BatchJob:
"""
Evaluate the process graph by creating a batch job, and retrieving the results when it is finished.
This method is mostly recommended if the batch job is expected to run in a reasonable amount of time.
For very long-running jobs, you probably do not want to keep the client running.
:param job_options:
:param outputfile: The path of a file to which a result can be written
:param out_format: (optional) Format of the job result.
:param format_options: String Parameters for the job result format
Execute the underlying process graph at the backend in batch job mode:
- create the job (like :py:meth:`create_job`)
- start the job (like :py:meth:`BatchJob.start() <openeo.rest.job.BatchJob.start>`)
- track the job's progress with an active polling loop
(like :py:meth:`BatchJob.run_synchronous() <openeo.rest.job.BatchJob.run_synchronous>`)
- optionally (if ``outputfile`` is specified) download the job's results
when the job finished successfully
.. note::
Because of the active polling loop,
which blocks any further progress of your script or application,
this :py:meth:`execute_batch` method is mainly recommended
for batch jobs that are expected to complete
in a time that is reasonable for your use case.
:param outputfile: Optional, output path to download to.
:param title: job title.
:param description: job description.
:param plan: The billing plan to process and charge the job with
:param budget: Maximum budget to be spent on executing the job.
Note that some backends do not honor this limit.
:param additional: additional (top-level) properties to set in the request body
:param job_options: dictionary of job options to pass to the backend
(under top-level property "job_options")
Expand All @@ -96,6 +109,7 @@ def execute_batch(
One of "error" (highest severity), "warning", "info", and "debug" (lowest severity).
:param max_poll_interval: maximum number of seconds to sleep between job status polls
:param connection_retry_interval: how long to wait when status poll failed due to connection issue
:param print: print/logging function to show progress/status
.. versionchanged:: 0.36.0
Added argument ``additional``.
Expand Down Expand Up @@ -136,17 +150,24 @@ def create_job(
log_level: Optional[str] = None,
) -> BatchJob:
"""
Sends a job to the backend and returns a ClientJob instance.
Send the underlying process graph to the backend
to create an openEO batch job
and return a corresponding :py:class:`~openeo.rest.job.BatchJob` instance.
:param title: job title
:param description: job description
:param plan: The billing plan to process and charge the job with
Note that this method only *creates* the openEO batch job at the backend,
but it does not *start* it.
Use :py:meth:`execute_batch` instead to let the openEO Python client
take care of the full job life cycle: create, start and track its progress until completion.
:param title: job title.
:param description: job description.
:param plan: The billing plan to process and charge the job with.
:param budget: Maximum budget to be spent on executing the job.
Note that some backends do not honor this limit.
:param additional: additional (top-level) properties to set in the request body
:param job_options: dictionary of job options to pass to the backend
(under top-level property "job_options")
:param format_options: String Parameters for the job result format
:param log_level: Optional minimum severity level for log entries that the back-end should keep track of.
One of "error" (highest severity), "warning", "info", and "debug" (lowest severity).
:return: Created job.
Expand Down
71 changes: 51 additions & 20 deletions openeo/rest/vectorcube.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
from openeo.rest.job import BatchJob
from openeo.rest.mlmodel import MlModel
from openeo.rest.result import SaveResult
from openeo.rest.stac_resource import StacResource
from openeo.util import InvalidBBoxException, dict_no_none, guess_format, to_bbox_dict

if typing.TYPE_CHECKING:
Expand Down Expand Up @@ -218,25 +217,33 @@ def download(
*,
validate: Optional[bool] = None,
auto_add_save_result: bool = True,
additional: Optional[dict] = None,
job_options: Optional[dict] = None,
) -> Union[None, bytes]:
"""
Execute synchronously and download the vector cube.
The result will be stored to the output path, when specified.
If no output path (or ``None``) is given, the raw download content will be returned as ``bytes`` object.
:param outputfile: (optional) output file to store the result to
:param format: (optional) output format to use.
:param options: (optional) additional output format options.
:param outputfile: Optional, output path to download to.
:param format: Optional, an output format supported by the backend.
:param options: Optional, file format options
:param validate: Optional toggle to enable/prevent validation of the process graphs before execution
(overruling the connection's ``auto_validate`` setting).
:param auto_add_save_result: Automatically add a ``save_result`` node to the process graph if there is none yet.
:param additional: additional (top-level) properties to set in the request body
:param job_options: dictionary of job options to pass to the backend
(under top-level property "job_options")
.. versionchanged:: 0.21.0
When not specified explicitly, output format is guessed from output file extension.
.. versionchanged:: 0.32.0
Added ``auto_add_save_result`` option
.. versionchanged:: 0.39.0
Added arguments ``additional`` and ``job_options``.
"""
# TODO #278 centralize download/create_job/execute_job logic in DataCube, VectorCube, MlModel, ...
if auto_add_save_result:
Expand All @@ -250,7 +257,9 @@ def download(
)
else:
res = self
return self._connection.download(res.flat_graph(), outputfile=outputfile, validate=validate)
return self._connection.download(
res.flat_graph(), outputfile=outputfile, validate=validate, additional=additional, job_options=job_options
)

def execute_batch(
self,
Expand All @@ -274,17 +283,32 @@ def execute_batch(
**format_options,
) -> BatchJob:
"""
Evaluate the process graph by creating a batch job, and retrieving the results when it is finished.
This method is mostly recommended if the batch job is expected to run in a reasonable amount of time.
For very long running jobs, you probably do not want to keep the client running.
Execute the underlying process graph at the backend in batch job mode:
- create the job (like :py:meth:`create_job`)
- start the job (like :py:meth:`BatchJob.start() <openeo.rest.job.BatchJob.start>`)
- track the job's progress with an active polling loop
(like :py:meth:`BatchJob.run_synchronous() <openeo.rest.job.BatchJob.run_synchronous>`)
- optionally (if ``outputfile`` is specified) download the job's results
when the job finished successfully
.. note::
Because of the active polling loop,
which blocks any further progress of your script or application,
this :py:meth:`execute_batch` method is mainly recommended
for batch jobs that are expected to complete
in a time that is reasonable for your use case.
:param outputfile: Optional, output path to download to.
:param out_format: (optional) File format to use for the job result.
:param title: job title.
:param description: job description.
:param plan: The billing plan to process and charge the job with
:param budget: Maximum budget to be spent on executing the job.
Note that some backends do not honor this limit.
:param additional: additional (top-level) properties to set in the request body
:param job_options: dictionary of job options to pass to the backend
(under top-level property "job_options")
:param outputfile: The path of a file to which a result can be written
:param out_format: (optional) output format to use.
:param format_options: (optional) additional output format options
:param validate: Optional toggle to enable/prevent validation of the process graphs before execution
(overruling the connection's ``auto_validate`` setting).
:param auto_add_save_result: Automatically add a ``save_result`` node to the process graph if there is none yet.
Expand All @@ -293,6 +317,7 @@ def execute_batch(
One of "error" (highest severity), "warning", "info", and "debug" (lowest severity).
:param max_poll_interval: maximum number of seconds to sleep between job status polls
:param connection_retry_interval: how long to wait when status poll failed due to connection issue
:param print: print/logging function to show progress/status
.. versionchanged:: 0.21.0
When not specified explicitly, output format is guessed from output file extension.
Expand Down Expand Up @@ -359,18 +384,24 @@ def create_job(
**format_options,
) -> BatchJob:
"""
Sends a job to the backend and returns a ClientJob instance.
:param out_format: String Format of the job result.
:param title: job title
:param description: job description
:param plan: The billing plan to process and charge the job with
Send the underlying process graph to the backend
to create an openEO batch job
and return a corresponding :py:class:`~openeo.rest.job.BatchJob` instance.
Note that this method only *creates* the openEO batch job at the backend,
but it does not *start* it.
Use :py:meth:`execute_batch` instead to let the openEO Python client
take care of the full job life cycle: create, start and track its progress until completion.
:param out_format: output file format.
:param title: job title.
:param description: job description.
:param plan: The billing plan to process and charge the job with.
:param budget: Maximum budget to be spent on executing the job.
Note that some backends do not honor this limit.
:param additional: additional (top-level) properties to set in the request body
:param job_options: dictionary of job options to pass to the backend
(under top-level property "job_options")
:param format_options: String Parameters for the job result format
:param validate: Optional toggle to enable/prevent validation of the process graphs before execution
(overruling the connection's ``auto_validate`` setting).
:param auto_add_save_result: Automatically add a ``save_result`` node to the process graph if there is none yet.
Expand Down
5 changes: 5 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@

import pytest

pytest.register_assert_rewrite(
"openeo.internal.documentation",
)


from openeo.testing import TestDataLoader
from openeo.util import ensure_dir

Expand Down
68 changes: 68 additions & 0 deletions tests/internal/test_documentation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import pytest

from openeo import DataCube, VectorCube
from openeo.internal.documentation import assert_same_param_docs, extract_params
from openeo.rest.mlmodel import MlModel
from openeo.rest.stac_resource import StacResource


def test_extract_params():
assert (
extract_params(
"""
The description
and more
:param a: description of a
:param b_b : multi-line description
of b
That's it!
"""
)
== {
"a": "description of a",
"b_b": "multi-line description\n of b",
}
)


def test_compare_param_docs_datacube():
assert_same_param_docs(DataCube.download, DataCube.create_job, only_intersection=True)
assert_same_param_docs(DataCube.download, DataCube.execute_batch, only_intersection=True)


@pytest.mark.parametrize(
["method_a", "method_b"],
[
(DataCube.download, VectorCube.download),
(DataCube.create_job, VectorCube.create_job),
(DataCube.execute_batch, VectorCube.execute_batch),
],
)
def test_compare_docs_datacube_vectorcube(method_a, method_b):
assert_same_param_docs(method_a, method_b, only_intersection=False)


@pytest.mark.parametrize(
["method_a", "method_b"],
[
(DataCube.create_job, MlModel.create_job),
(DataCube.execute_batch, MlModel.execute_batch),
],
)
def test_compare_docs_datacube_mlmodel(method_a, method_b):
assert_same_param_docs(method_a, method_b, only_intersection=True)


@pytest.mark.parametrize(
["method_a", "method_b"],
[
(DataCube.download, StacResource.download),
(DataCube.create_job, StacResource.create_job),
(DataCube.execute_batch, StacResource.execute_batch),
],
)
def test_compare_docs_datacube_stac_resource(method_a, method_b):
assert_same_param_docs(method_a, method_b, only_intersection=True)

0 comments on commit 62f2221

Please sign in to comment.