Skip to content
This repository has been archived by the owner on Aug 29, 2023. It is now read-only.

Commit

Permalink
Merge branch 'master' of https://github.com/CCI-Tools/cate into 832_d…
Browse files Browse the repository at this point in the history
…zelge_loading_sea_ice_data_failed
  • Loading branch information
dzelge committed Jan 18, 2019
2 parents 4284d05 + d5105d3 commit e109977
Show file tree
Hide file tree
Showing 18 changed files with 301 additions and 67 deletions.
9 changes: 8 additions & 1 deletion CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,11 @@
## Version 2.0.0.dev24 (in development)
## Version 2.0.0.dev25 (in development)

* Increased default time-out for data downloads from 10 to 90 seconds. Addresses (but not fixes)
[#835](https://github.com/CCI-Tools/cate/issues/835)
* Fixed installation problem with latest Miniconda 4.5.12
[#831](https://github.com/CCI-Tools/cate/issues/831)

## Version 2.0.0.dev24

* Loading SeaIce data throws a ValueError: The truth value of an array with more than one element is ambiguous.
[#832](https://github.com/CCI-Tools/cate/issues/832)
Expand Down
88 changes: 80 additions & 8 deletions cate/core/ds.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,11 @@
import datetime
import glob
import itertools
import logging
import re
from abc import ABCMeta, abstractmethod
from enum import Enum
from typing import Sequence, Optional, Union, Any, Dict, Set
from typing import Sequence, Optional, Union, Any, Dict, Set, List

import xarray as xr

Expand All @@ -105,6 +106,8 @@
r'(?::\d+)?' # optional port
r'(?:/?|[/?]\S+)$', re.IGNORECASE)

_LOG = logging.getLogger('cate')


class DataAccessWarning(UserWarning):
"""
Expand Down Expand Up @@ -314,23 +317,25 @@ def _repr_html_(self):
def _cannot_access_error(self, time_range=None, region=None, var_names=None,
verb="open", cause: BaseException = None, error_cls=DataAccessError):
error_message = f'Failed to {verb} data source "{self.id}"'
contraints = []
constraints = []
if time_range is not None and time_range != "":
contraints.append("time range")
constraints.append("time range")
if region is not None and region != "":
contraints.append("region")
constraints.append("region")
if var_names is not None and var_names != "":
contraints.append("variable names")
if contraints:
error_message += " for given " + ", ".join(contraints)
constraints.append("variable names")
if constraints:
error_message += " for given " + ", ".join(constraints)
if cause is not None:
error_message += f": {cause}"
_LOG.info(error_message)
return error_cls(error_message)

def _empty_error(self, time_range=None):
error_message = f'Data source "{self.id}" does not seem to have any datasets'
if time_range is not None:
error_message += f' in given time range {TimeRangeLike.format(time_range)}'
_LOG.info(error_message)
return DataAccessError(error_message)


Expand All @@ -348,6 +353,56 @@ class DataSourceStatus(Enum):
CANCELLED = "CANCELLED"


class DataStoreNotice:
"""
A short notice that can be exposed to users by data stores.
"""

def __init__(self, id: str, title: str, content: str, intent: str = None, icon: str = None):
"""
A short notice that can be exposed to users by data stores.
:param id: Notice ID.
:param title: A human-readable, plain text title.
:param content: A human-readable, plain text title that may be formatted using Markdown.
:param intent: Notice intent, may be one of "default", "primary", "success", "warning", "danger"
:param icon: An option icon name. See https://blueprintjs.com/docs/versions/1/#core/icons
"""
if id is None or id == "":
raise ValueError("invalid id")
if title is None or title == "":
raise ValueError("invalid title")
if content is None or content == "":
raise ValueError("invalid content")
if intent not in {None, "default", "primary", "success", "warning", "danger"}:
raise ValueError("invalid intent")

self._dict = dict(id=id, title=title, content=content, icon=icon, intent=intent)

@property
def id(self):
return self._dict["id"]

@property
def title(self):
return self._dict["title"]

@property
def content(self):
return self._dict["content"]

@property
def intent(self):
return self._dict["intent"]

@property
def icon(self):
return self._dict["icon"]

def to_dict(self):
return dict(self._dict)


class DataStore(metaclass=ABCMeta):
"""
Represents a data store of data sources.
Expand Down Expand Up @@ -375,6 +430,23 @@ def title(self) -> str:
"""
return self._title

@property
def description(self) -> Optional[str]:
"""
Return an optional, human-readable description for this data store as plain text.
The text may use Markdown formatting.
"""
return None

@property
def notices(self) -> List[DataStoreNotice]:
"""
Return an optional list of notices for this data store that can be used to inform users about the
conventions, standards, and data extent used in this data store or upcoming service outages.
"""
return []

@property
def is_local(self) -> bool:
"""
Expand Down Expand Up @@ -612,7 +684,7 @@ def open_xarray_dataset(paths,
var_names: VarNamesLike.TYPE = None,
monitor: Monitor = Monitor.NONE,
**kwargs) -> xr.Dataset:
"""
r"""
Open multiple files as a single dataset. This uses dask. If each individual file
of the dataset is small, one Dask chunk will coincide with one temporal slice,
e.g. the whole array in the file. Otherwise smaller dask chunks will be used
Expand Down
2 changes: 1 addition & 1 deletion cate/core/op.py
Original file line number Diff line number Diff line change
Expand Up @@ -681,7 +681,7 @@ def new_subprocess_op(op_meta_info: OpMetaInfo,
started: Union[str, Callable] = None,
progress: Union[str, Callable] = None,
done: Union[str, Callable] = None) -> Operation:
"""
r"""
Create an operation for a child program run in a new process.
:param op_meta_info: Meta-information about the resulting operation and the operation's inputs and outputs.
Expand Down
2 changes: 1 addition & 1 deletion cate/core/workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -1092,7 +1092,7 @@ def __repr__(self):


class SubProcessStep(OpStepBase):
"""
r"""
A ``SubProcessStep`` is a step node that computes its output by a sub-process created from the
given *program*.
Expand Down
81 changes: 75 additions & 6 deletions cate/ds/esa_cci_odp.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
==========
"""
import json
import logging
import os
import re
import socket
Expand All @@ -47,7 +48,7 @@
from collections import OrderedDict
from datetime import datetime, timedelta
from math import ceil
from typing import Sequence, Tuple, Optional, Any, Dict
from typing import Sequence, Tuple, Optional, Any, Dict, List
from urllib.error import URLError, HTTPError

import pandas as pd
Expand All @@ -58,7 +59,7 @@
from cate.conf import get_config_value, get_data_stores_path
from cate.conf.defaults import NETCDF_COMPRESSION_LEVEL
from cate.core.ds import DATA_STORE_REGISTRY, DataAccessError, NetworkError, DataStore, DataSource, Schema, \
open_xarray_dataset
open_xarray_dataset, DataStoreNotice
from cate.core.opimpl import subset_spatial_impl, normalize_impl, adjust_spatial_attrs_impl
from cate.core.types import PolygonLike, TimeLike, TimeRange, TimeRangeLike, VarNamesLike
from cate.ds.local import add_to_data_store_registry, LocalDataSource, LocalDataStore
Expand Down Expand Up @@ -105,8 +106,10 @@
_CSW_METADATA_CACHE_FILE = 'catalogue_metadata.xml'
_CSW_CACHE_FILE = 'catalogue.xml'

_LOG = logging.getLogger('cate')

# by default there is no timeout
socket.setdefaulttimeout(10)
socket.setdefaulttimeout(90)


def get_data_store_path():
Expand Down Expand Up @@ -332,6 +335,68 @@ def __init__(self,

self._csw_data = None

@property
def description(self) -> Optional[str]:
"""
Return a human-readable description for this data store as plain text.
The text may use Markdown formatting.
"""
return ("This data store represents the [ESA CCI Open Data Portal](http://cci.esa.int/data)"
" in the CCI Toolbox.\n"
"It currently provides all CCI data that are published through the "
"[ESGF-CEDA services](https://esgf-index1.ceda.ac.uk/search/esacci-ceda/) "
"(gridded data stored as NetCDF files). "
"The store will be extended shortly to also provide TIFF and Shapefile Data, see usage "
"notes.\n"
"Remote data downloaded to your computer is made available through the *Local Data Store*.")

@property
def notices(self) -> Optional[List[DataStoreNotice]]:
"""
Return an optional list of notices for this data store that can be used to inform users about the
conventions, standards, and data extent used in this data store or upcoming service outages.
"""
return [
DataStoreNotice("terminologyClarification",
"Terminology Clarification",
"The ESA CCI Open Data Portal (ODP) utilises an "
"[ontology](http://vocab-test.ceda.ac.uk/ontology/cci/cci-content/index.html) whose terms "
"might slightly differ from the ones used in this software."
"\n"
"For example, a *Dataset* in the CCI terminology may refer to all data products "
"generated by a certain CCI project using a specific configuration of algorithms "
"and auxiliary data."
"\n"
"In this software, a *Data Source* refers to a subset (a file set) "
"of a given ODP dataset whose data share a common spatio-temporal grid and/or share "
"other common properties, e.g. the instrument used for the original measurements."
"\n"
"In addition, Cate uses the term *Dataset* to represent in-memory "
"instances of gridded data sources or subsets of them.",
intent="primary",
icon="info-sign"),
DataStoreNotice("dataCompleteness",
"Data Completeness",
"This data store currently provides **only a subset of all datasets** provided by the "
"ESA CCI Open Data Portal (ODP), namely gridded datasets originally stored in NetCDF "
"format."
"\n"
"In upcoming versions of Cate, the ODP data store will also allow for browsing "
"and accessing the remaining ODP datasets. This includes gridded data in TIFF format and "
"also vector data using ESRI Shapefile format."
"\n"
"For time being users can download the missing vector data from the "
"[ODP FTP server](http://cci.esa.int/data#ftp) `ftp://anon-ftp.ceda.ac.uk/neodc/esacci/` "
"and then use operation `read_geo_data_frame()` in Cate to read the "
"downloaded Shapefiles:"
"\n"
"* CCI Glaciers in FTP directory `glaciers`\n"
"* CCI Ice Sheets in FTP directories `ice_sheets_antarctica` and `ice_sheets_greenland`\n",
intent="warning",
icon="warning-sign"),
]

@property
def index_cache_used(self):
return self._index_cache_used
Expand Down Expand Up @@ -857,8 +922,10 @@ def _make_local(self,
child_monitor.progress(work=20)

if var_names:
remote_dataset = remote_dataset.drop([var_name for var_name in remote_dataset.data_vars.keys()
if var_name not in var_names])
remote_dataset = remote_dataset.drop(
[var_name for var_name in remote_dataset.data_vars.keys()
if var_name not in var_names]
)
if region:
remote_dataset = normalize_impl(remote_dataset)
remote_dataset = subset_spatial_impl(remote_dataset, region)
Expand Down Expand Up @@ -927,7 +994,9 @@ def reporthook(block_number, read_size, total_file_size):

sub_monitor_msg = "file %d of %d" % (file_number, len(outdated_file_list))
with child_monitor.starting(sub_monitor_msg, file_size):
urllib.request.urlretrieve(url[protocol], filename=dataset_file, reporthook=reporthook)
actual_url = url[protocol]
_LOG.info(f"Downloading {actual_url} to {dataset_file}")
urllib.request.urlretrieve(actual_url, filename=dataset_file, reporthook=reporthook)
file_number += 1
local_ds.add_dataset(os.path.join(local_id, filename), (coverage_from, coverage_to))

Expand Down
38 changes: 35 additions & 3 deletions cate/ds/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,18 +47,18 @@
from collections import OrderedDict
from datetime import datetime
from glob import glob
from typing import Optional, Sequence, Union, Any, Tuple
from typing import Optional, Sequence, Union, Any, Tuple, List
from urllib.error import URLError, HTTPError

import psutil
import shapely.geometry
import xarray as xr
from dateutil import parser

from cate.conf import get_config_value, get_data_stores_path
from cate.conf import get_config_value, get_data_stores_path, GLOBAL_CONF_FILE
from cate.conf.defaults import NETCDF_COMPRESSION_LEVEL
from cate.core.ds import DATA_STORE_REGISTRY, DataAccessError, NetworkError, DataAccessWarning, DataSourceStatus, \
DataStore, DataSource, open_xarray_dataset
DataStore, DataSource, open_xarray_dataset, DataStoreNotice
from cate.core.opimpl import subset_spatial_impl, normalize_impl, adjust_spatial_attrs_impl
from cate.core.types import PolygonLike, TimeRange, TimeRangeLike, VarNames, VarNamesLike, ValidationError
from cate.util.monitor import Monitor
Expand Down Expand Up @@ -572,6 +572,38 @@ def __init__(self, ds_id: str, store_dir: str):
self._store_dir = store_dir
self._data_sources = None

@property
def description(self) -> Optional[str]:
"""
Return a human-readable description for this data store as plain text.
The text may use Markdown formatting.
"""
return ("The local data store represents "
"all the data sources in your local file system known by Cate. "
"It contains any downloaded remote data sources or files in your file system "
"manually added.")

@property
def notices(self) -> Optional[List[DataStoreNotice]]:
"""
Return an optional list of notices for this data store that can be used to inform users about the
conventions, standards, and data extent used in this data store or upcoming service outages.
"""
return [
DataStoreNotice("localDataStorage",
"Local Data Storage",
"The local data store is currently configured to synchronize remote data in the "
f"`{get_data_stores_path()}`.\n"
"You can change this location either "
f"in Cate's configuration file `{GLOBAL_CONF_FILE}` "
"or in the user preference settings of Cate Desktop.\n"
"In order to keep your data, move your old directory to the new location, before "
"changing the location.",
intent="primary",
icon="info-sign"),
]

def add_pattern(self, data_source_id: str, files: Union[str, Sequence[str]] = None) -> 'DataSource':
data_source = self.create_data_source(data_source_id)
if isinstance(files, str) and len(files) > 0:
Expand Down
2 changes: 2 additions & 0 deletions cate/ops/animate.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,13 @@
Supported formats: html
"""

import os

# import matplotlib
# noinspection PyBroadException
# try:
# import matplotlib
# matplotlib.use('Qt5Agg')
# has_qt5agg = True
# except Exception:
Expand Down
2 changes: 1 addition & 1 deletion cate/util/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ def _cancel(process: subprocess.Popen, kill_on_cancel: bool):


class ProcessOutputMonitor:
"""
r"""
A stdout handler for :py:func:`execute` the delegates extracted progress information to a monitor.
Information is extracted using regular expressions or a callable that extracts the information.
Expand Down
2 changes: 1 addition & 1 deletion cate/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
# SOFTWARE.

# Cate version string (PEP440-compatible), e.g. "0.8.0", "0.8.0.dev1", "0.8.0rc1", "0.8.0rc1.dev1"
__version__ = '2.0.0.dev24'
__version__ = '2.0.0.dev25'

# Other package metainfo
__title__ = 'cate'
Expand Down
Loading

0 comments on commit e109977

Please sign in to comment.